Integrated Laplace and merged Merge remote-tracking branch 'gpy_real/devel' into merge_branch

Conflicts:
	GPy/core/gp.py
	GPy/likelihoods/__init__.py
	GPy/likelihoods/likelihood_functions.py
	GPy/likelihoods/link_functions.py
This commit is contained in:
Alan Saul 2013-10-03 16:52:02 +01:00
commit 8343615098
106 changed files with 5841 additions and 1134 deletions

8
GPy/FAQ.txt Normal file
View file

@ -0,0 +1,8 @@
Frequently Asked Questions
--------------------------
Unit tests are run through Travis-Ci. They can be run locally through entering the GPy route diretory and writing
nosetests testing/
Documentation is handled by Sphinx. To build the documentation:

View file

@ -0,0 +1,10 @@
In this text document we will describe coding conventions to be used in GPy to keep things consistent.
All arrays containing data are two dimensional. The first dimension is the number of data, the second dimension is number of features. This keeps things consistent with the idea of a design matrix.
Input matrices are either X or t, output matrices are Y.
Input dimensionality is input_dim, output dimensionality is output_dim, number of data is num_data.
Data sets are preprocessed in the datasets.py file. This file also records where the data set was obtained from in the dictionary stored in the file. Long term we should move this dictionary to sqlite or similar.

View file

@ -11,25 +11,27 @@ from sparse_gp import SparseGP
class FITC(SparseGP): class FITC(SparseGP):
""" """
sparse FITC approximation
Sparse FITC approximation
:param X: inputs :param X: inputs
:type X: np.ndarray (num_data x Q) :type X: np.ndarray (num_data x Q)
:param likelihood: a likelihood instance, containing the observed data :param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP) :type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel : the kernel (covariance function). See link kernels :param kernel: the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance :type kernel: a GPy.kern.kern instance
:param Z: inducing inputs (optional, see note) :param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None :type Z: np.ndarray (M x Q) | None
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool :type normalize_(X|Y): bool
""" """
def __init__(self, X, likelihood, kernel, Z, normalize_X=False): def __init__(self, X, likelihood, kernel, Z, normalize_X=False):
SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False) SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False)
assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs" assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs"
def update_likelihood_approximation(self): def update_likelihood_approximation(self, **kwargs):
""" """
Approximates a non-Gaussian likelihood using Expectation Propagation Approximates a non-Gaussian likelihood using Expectation Propagation
@ -37,7 +39,7 @@ class FITC(SparseGP):
this function does nothing this function does nothing
""" """
self.likelihood.restart() self.likelihood.restart()
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0, **kwargs)
self._set_params(self._get_params()) self._set_params(self._get_params())
def _compute_kernel_matrices(self): def _compute_kernel_matrices(self):
@ -120,7 +122,7 @@ class FITC(SparseGP):
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z) self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z) self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z) self._dKmm_dX += self.kern.dK_dX(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:]) self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood # the partial derivative vector for the likelihood
@ -140,7 +142,6 @@ class FITC(SparseGP):
dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise) dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T) dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star) alpha = mdot(LBiLmipsi1,self.V_star)
@ -174,7 +175,7 @@ class FITC(SparseGP):
def dL_dZ(self): def dL_dZ(self):
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X) dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z) dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z)
dL_dZ += self._dpsi1_dX dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX dL_dZ += self._dKmm_dX
return dL_dZ return dL_dZ

View file

@ -15,20 +15,17 @@ class GP(GPBase):
:param X: input observations :param X: input observations
:param kernel: a GPy kernel, defaults to rbf+white :param kernel: a GPy kernel, defaults to rbf+white
:parm likelihood: a GPy likelihood :param likelihood: a GPy likelihood
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True :type normalize_X: False|True
:rtype: model object :rtype: model object
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
:type powerep: list
.. Note:: Multiple independent outputs are allowed using columns of Y .. Note:: Multiple independent outputs are allowed using columns of Y
""" """
def __init__(self, X, likelihood, kernel, normalize_X=False): def __init__(self, X, likelihood, kernel, normalize_X=False):
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
self._set_params(self._get_params()) self.update_likelihood_approximation()
def getstate(self): def getstate(self):
return GPBase.getstate(self) return GPBase.getstate(self)
@ -38,15 +35,19 @@ class GP(GPBase):
self._set_params(self._get_params()) self._set_params(self._get_params())
def _set_params(self, p): def _set_params(self, p):
self.kern._set_params_transformed(p[:self.kern.num_params_transformed()]) new_kern_params = p[:self.kern.num_params_transformed()]
self.likelihood._set_params(p[self.kern.num_params_transformed():]) new_likelihood_params = p[self.kern.num_params_transformed():]
old_likelihood_params = self.likelihood._get_params()
#TODO: Need to get rid of this check and think of a nicer OO way self.kern._set_params_transformed(new_kern_params)
if isinstance(self.likelihood, Laplace): self.likelihood._set_params_transformed(new_likelihood_params)
self.likelihood.fit_full(self.kern.K(self.X))
self.likelihood._set_params(self.likelihood._get_params())
self.K = self.kern.K(self.X) self.K = self.kern.K(self.X)
#Re fit likelihood approximation (if it is an approx), as parameters have changed
if isinstance(self.likelihood, Laplace):
self.likelihood.fit_full(self.K)
self.K += self.likelihood.covariance_matrix self.K += self.likelihood.covariance_matrix
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K) self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
@ -63,6 +64,10 @@ class GP(GPBase):
tmp, _ = dpotrs(self.L, np.asfortranarray(tmp.T), lower=1) tmp, _ = dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki) self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki)
#Adding dZ_dK (0 for a non-approximate likelihood, compensates for
#additional gradients of K when log-likelihood has non-zero Z term)
self.dL_dK += self.likelihood.dZ_dK
def _get_params(self): def _get_params(self):
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params())) return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
@ -70,7 +75,7 @@ class GP(GPBase):
def _get_param_names(self): def _get_param_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names() return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self): def update_likelihood_approximation(self, **kwargs):
""" """
Approximates a non-gaussian likelihood using Expectation Propagation Approximates a non-gaussian likelihood using Expectation Propagation
@ -78,7 +83,7 @@ class GP(GPBase):
this function does nothing this function does nothing
""" """
self.likelihood.restart() self.likelihood.restart()
self.likelihood.fit_full(self.kern.K(self.X)) self.likelihood.fit_full(self.kern.K(self.X), **kwargs)
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
def _model_fit_term(self): def _model_fit_term(self):
@ -103,25 +108,13 @@ class GP(GPBase):
return (-0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) - return (-0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) -
0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z) 0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z)
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
""" """
The gradient of all parameters. The gradient of all parameters.
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
""" """
dL_dthetaK = self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X) return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
#Think of OO way of doing this also
if isinstance(self.likelihood, Laplace):
#self.likelihood.fit_full(self.kern.K(self.X))
#self.likelihood._set_params(self.likelihood._get_params())
dK_dthetaK = self.kern.dK_dtheta
dL_dthetaK = self.likelihood._Kgradients(dK_dthetaK, self.X.copy())
dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
else:
dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
return np.hstack((dL_dthetaK, dL_dthetaL))
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False): def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
""" """
@ -146,17 +139,16 @@ class GP(GPBase):
def predict(self, Xnew, which_parts='all', full_cov=False, likelihood_args=dict()): def predict(self, Xnew, which_parts='all', full_cov=False, likelihood_args=dict()):
""" """
Predict the function(s) at the new point(s) Xnew. Predict the function(s) at the new point(s) Xnew.
Arguments
---------
:param Xnew: The points at which to make a prediction :param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim :type Xnew: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction :param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools) :type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal :param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool :type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim :returns: mean: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise :returns: var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim :returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
@ -169,5 +161,69 @@ class GP(GPBase):
# now push through likelihood # now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args) mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
return mean, var, _025pm, _975pm return mean, var, _025pm, _975pm
def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
"""
For a specific output, predict the function at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool
:returns: posterior mean, a Numpy array, Nnew x self.input_dim
:returns: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
.. Note:: For multiple output models only
"""
assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
index = np.ones_like(Xnew)*output
Xnew = np.hstack((Xnew,index))
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output)
return mean, var, _025pm, _975pm
def _raw_predict_single_output(self, _Xnew, output=0, which_parts='all', full_cov=False,stop=False):
"""
Internal helper function for making predictions for a specific output,
does not account for normalization or likelihood
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
.. Note:: For multiple output models only
"""
assert hasattr(self,'multioutput'), 'This function is for multiple output models only.'
# creates an index column and appends it to _Xnew
index = np.ones_like(_Xnew)*output
_Xnew = np.hstack((_Xnew,index))
Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
KiKx, _ = dpotrs(self.L, np.asfortranarray(Kx), lower=1)
mu = np.dot(KiKx.T, self.likelihood.Y)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - np.dot(KiKx.T, Kx)
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
var = var[:, None]
if stop:
debug_this # @UndefinedVariable
return mu, var

View file

@ -57,18 +57,12 @@ class GPBase(Model):
self.X = state.pop() self.X = state.pop()
Model.setstate(self, state) Model.setstate(self, state)
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None): def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None,output=None):
""" """
Plot the GP's view of the world, where the data is normalized and the Plot the GP's view of the world, where the data is normalized and the
likelihood is Gaussian.
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations. - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function - In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO! - Not implemented in higher dimensions
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
:param samples: the number of a posteriori samples to plot :param samples: the number of a posteriori samples to plot
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
@ -85,6 +79,8 @@ class GPBase(Model):
:param ax: axes to plot on. :param ax: axes to plot on.
:type ax: axes handle :type ax: axes handle
:param output: which output to plot (for multiple output models only)
:type output: integer (first output is 0)
""" """
if which_data == 'all': if which_data == 'all':
which_data = slice(None) which_data = slice(None)
@ -93,6 +89,8 @@ class GPBase(Model):
fig = pb.figure(num=fignum) fig = pb.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
if not hasattr(self,'multioutput'):
if self.X.shape[1] == 1: if self.X.shape[1] == 1:
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits) Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
if samples == 0: if samples == 0:
@ -101,16 +99,22 @@ class GPBase(Model):
ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5) ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
else: else:
m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True) m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
Ysim = np.random.multivariate_normal(m.flatten(), v, samples) Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax) gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
for i in range(samples): for i in range(samples):
ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25) ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5) ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
ax.set_xlim(xmin, xmax) ax.set_xlim(xmin, xmax)
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None]))) ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_ylim(ymin, ymax) ax.set_ylim(ymin, ymax)
if hasattr(self,'Z'):
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 2: elif self.X.shape[1] == 2:
resolution = resolution or 50 resolution = resolution or 50
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution) Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
@ -120,17 +124,54 @@ class GPBase(Model):
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0]) ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1]) ax.set_ylim(xmin[1], xmax[1])
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
else:
assert len(self.likelihood.noise_model_list) > output, 'The model has only %s outputs.' %self.num_outputs
if self.X.shape[1] == 2:
Xu = self.X[self.X[:,-1]==output ,0:1]
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
if samples == 0:
m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts)
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
else:
m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts, full_cov=True)
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
for i in range(samples):
ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
ax.set_xlim(xmin, xmax)
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 3:
raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs
else: else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions" raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): if hasattr(self,'Z'):
Zu = self.Z[self.Z[:,-1]==output,:]
Zu = self.Z * self._Xscale + self._Xoffset
Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, output=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
""" """
Plot the GP with noise where the likelihood is Gaussian. Plot the GP with noise where the likelihood is Gaussian.
Plot the posterior of the GP. Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations. - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function - In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, we've no implemented this yet !TODO! - Not implemented in higher dimensions
Can plot only part of the data and part of the posterior functions Can plot only part of the data and part of the posterior functions
using which_data and which_functions using which_data and which_functions
@ -151,15 +192,13 @@ class GPBase(Model):
:type fignum: figure number :type fignum: figure number
:param ax: axes to plot on. :param ax: axes to plot on.
:type ax: axes handle :type ax: axes handle
:type output: integer (first output is 0)
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples :type fixed_inputs: a list of tuples
:param linecol: color of line to plot. :param linecol: color of line to plot.
:type linecol: :type linecol:
:param fillcol: color of fill :param fillcol: color of fill
:type fillcol: :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:param levels: for 2D plotting, the number of contour levels to use
is ax is None, create a new figure
""" """
# TODO include samples # TODO include samples
if which_data == 'all': if which_data == 'all':
@ -169,11 +208,13 @@ class GPBase(Model):
fig = pb.figure(num=fignum) fig = pb.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
if not hasattr(self,'multioutput'):
plotdims = self.input_dim - len(fixed_inputs) plotdims = self.input_dim - len(fixed_inputs)
if plotdims == 1: if plotdims == 1:
resolution = resolution or 200
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now
fixed_dims = np.array([i for i,v in fixed_inputs]) fixed_dims = np.array([i for i,v in fixed_inputs])
freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims) freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
@ -193,17 +234,72 @@ class GPBase(Model):
ax.set_xlim(xmin, xmax) ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax) ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2: # FIXME
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits,resolution=resolution)
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 2:
resolution = resolution or 50 resolution = resolution or 50
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T m = m.reshape(resolution, resolution).T
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
Yf = self.likelihood.data.flatten() Yf = self.likelihood.Y.flatten()
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0]) ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1]) ax.set_ylim(xmin[1], xmax[1])
else: else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions" raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
else:
assert len(self.likelihood.noise_model_list) > output, 'The model has only %s outputs.' %self.num_outputs
if self.X.shape[1] == 2:
resolution = resolution or 200
Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest
Xu = self.X * self._Xscale + self._Xoffset
Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5)
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
elif self.X.shape[1] == 3:
raise NotImplementedError, "Plots not yet implemented for multioutput models with 2D inputs"
resolution = resolution or 50
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
"""
def samples_f(self,X,samples=10, which_data='all', which_parts='all',output=None):
if which_data == 'all':
which_data = slice(None)
if hasattr(self,'multioutput'):
np.hstack([X,np.ones((X.shape[0],1))*output])
m, v = self._raw_predict(X, which_parts=which_parts, full_cov=True)
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
#gpplot(X, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
for i in range(samples):
ax.plot(X, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
"""

View file

@ -49,6 +49,7 @@ class Mapping(Parameterized):
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue']): def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue']):
""" """
Plot the mapping. Plot the mapping.
Plots the mapping associated with the model. Plots the mapping associated with the model.
@ -79,8 +80,7 @@ class Mapping(Parameterized):
:type fixed_inputs: a list of tuples :type fixed_inputs: a list of tuples
:param linecol: color of line to plot. :param linecol: color of line to plot.
:type linecol: :type linecol:
:param levels: for 2D plotting, the number of contour levels to use :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
is ax is None, create a new figure
""" """
# TODO include samples # TODO include samples

View file

@ -1,4 +1,4 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
@ -31,8 +31,8 @@ class Model(Parameterized):
def getstate(self): def getstate(self):
""" """
Get the current state of the class. Get the current state of the class.
Inherited from Parameterized, so add those parameters to the state Inherited from Parameterized, so add those parameters to the state
:return: list of states from the model. :return: list of states from the model.
""" """
@ -47,6 +47,7 @@ class Model(Parameterized):
:param state: the state of the model. :param state: the state of the model.
:type state: list as returned from getstate. :type state: list as returned from getstate.
""" """
self.preferred_optimizer = state.pop() self.preferred_optimizer = state.pop()
self.sampling_runs = state.pop() self.sampling_runs = state.pop()
@ -56,10 +57,11 @@ class Model(Parameterized):
def set_prior(self, regexp, what): def set_prior(self, regexp, what):
""" """
Sets priors on the model parameters. Sets priors on the model parameters.
Notes **Notes**
-----
Asserts that the prior is suitable for the constraint. If the Asserts that the prior is suitable for the constraint. If the
wrong constraint is in place, an error is raised. If no wrong constraint is in place, an error is raised. If no
constraint is in place, one is added (warning printed). constraint is in place, one is added (warning printed).
@ -185,8 +187,8 @@ class Model(Parameterized):
be handled silently. If _all_ runs fail, the model is reset to the be handled silently. If _all_ runs fail, the model is reset to the
existing parameter values. existing parameter values.
Notes **Notes**
-----
:param num_restarts: number of restarts to use (default 10) :param num_restarts: number of restarts to use (default 10)
:type num_restarts: int :type num_restarts: int
:param robust: whether to handle exceptions silently or not (default False) :param robust: whether to handle exceptions silently or not (default False)
@ -195,7 +197,9 @@ class Model(Parameterized):
:type parallel: bool :type parallel: bool
:param num_processes: number of workers in the multiprocessing pool :param num_processes: number of workers in the multiprocessing pool
:type numprocesses: int :type numprocesses: int
**kwargs are passed to the optimizer. They can be:
\*\*kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations :param max_f_eval: maximum number of function evaluations
:type max_f_eval: int :type max_f_eval: int
:param max_iters: maximum number of iterations :param max_iters: maximum number of iterations
@ -203,9 +207,7 @@ class Model(Parameterized):
:param messages: whether to display during optimisation :param messages: whether to display during optimisation
:type messages: bool :type messages: bool
..Note: If num_processes is None, the number of workes in the multiprocessing pool is automatically .. note:: If num_processes is None, the number of workes in the multiprocessing pool is automatically set to the number of processors on the current machine.
set to the number of processors on the current machine.
""" """
initial_parameters = self._get_params_transformed() initial_parameters = self._get_params_transformed()
@ -397,17 +399,20 @@ class Model(Parameterized):
return np.nan return np.nan
return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
def __str__(self, names=None): def __str__(self):
if names is None: s = Parameterized.__str__(self).split('\n')
names = self._get_print_names() #def __str__(self, names=None):
s = Parameterized.__str__(self, names=names).split('\n') # if names is None:
# names = self._get_print_names()
#s = Parameterized.__str__(self, names=names).split('\n')
# add priors to the string # add priors to the string
if self.priors is not None: if self.priors is not None:
strs = [str(p) if p is not None else '' for p in self.priors] strs = [str(p) if p is not None else '' for p in self.priors]
else: else:
strs = [''] * len(self._get_param_names()) strs = [''] * len(self._get_params())
name_indices = self.grep_param_names("|".join(names)) # strs = [''] * len(self._get_param_names())
strs = np.array(strs)[name_indices] # name_indices = self.grep_param_names("|".join(names))
# strs = np.array(strs)[name_indices]
width = np.array(max([len(p) for p in strs] + [5])) + 4 width = np.array(max([len(p) for p in strs] + [5])) + 4
log_like = self.log_likelihood() log_like = self.log_likelihood()
@ -456,7 +461,7 @@ class Model(Parameterized):
gradient = self.objective_function_gradients(x) gradient = self.objective_function_gradients(x)
numerical_gradient = (f1 - f2) / (2 * dx) numerical_gradient = (f1 - f2) / (2 * dx)
global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient)) global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient)))
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance) return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance)
else: else:
@ -496,7 +501,7 @@ class Model(Parameterized):
gradient = self.objective_function_gradients(x)[i] gradient = self.objective_function_gradients(x)[i]
numerical_gradient = (f1 - f2) / (2 * step) numerical_gradient = (f1 - f2) / (2 * step)
ratio = (f1 - f2) / (2 * step * gradient) ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient))
difference = np.abs((f1 - f2) / 2 / step - gradient) difference = np.abs((f1 - f2) / 2 / step - gradient)
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
@ -535,22 +540,17 @@ class Model(Parameterized):
return k.variances return k.variances
def pseudo_EM(self, epsilon=.1, **kwargs): def pseudo_EM(self, stop_crit=.1, **kwargs):
""" """
TODO: Should this not bein the GP class?
EM - like algorithm for Expectation Propagation and Laplace approximation EM - like algorithm for Expectation Propagation and Laplace approximation
kwargs are passed to the optimize function. They can be: :param stop_crit: convergence criterion
:type stop_crit: float
:epsilon: convergence criterion
:max_f_eval: maximum number of function evaluations
:messages: whether to display during optimisation
:param optimzer: whice optimizer to use (defaults to self.preferred optimizer)
:type optimzer: string TODO: valid strings?
.. Note: kwargs are passed to update_likelihood and optimize functions.
""" """
assert isinstance(self.likelihood, likelihoods.EP), "pseudo_EM is only available for EP likelihoods" assert isinstance(self.likelihood, (likelihoods.EP, likelihoods.EP_Mixed_Noise, likelihoods.Laplace)), "pseudo_EM is only available for approximate likelihoods"
ll_change = epsilon + 1. ll_change = stop_crit + 1.
iteration = 0 iteration = 0
last_ll = -np.inf last_ll = -np.inf
@ -558,9 +558,24 @@ class Model(Parameterized):
alpha = 0 alpha = 0
stop = False stop = False
#Handle **kwargs
ep_args = {}
for arg in kwargs.keys():
if arg in ('epsilon','power_ep'):
ep_args[arg] = kwargs[arg]
del kwargs[arg]
while not stop: while not stop:
last_approximation = self.likelihood.copy() last_approximation = self.likelihood.copy()
last_params = self._get_params() last_params = self._get_params()
if len(ep_args) == 2:
self.update_likelihood_approximation(epsilon=ep_args['epsilon'],power_ep=ep_args['power_ep'])
elif len(ep_args) == 1:
if ep_args.keys()[0] == 'epsilon':
self.update_likelihood_approximation(epsilon=ep_args['epsilon'])
elif ep_args.keys()[0] == 'power_ep':
self.update_likelihood_approximation(power_ep=ep_args['power_ep'])
else:
self.update_likelihood_approximation() self.update_likelihood_approximation()
new_ll = self.log_likelihood() new_ll = self.log_likelihood()
ll_change = new_ll - last_ll ll_change = new_ll - last_ll
@ -573,7 +588,7 @@ class Model(Parameterized):
else: else:
self.optimize(**kwargs) self.optimize(**kwargs)
last_ll = self.log_likelihood() last_ll = self.log_likelihood()
if ll_change < epsilon: if ll_change < stop_crit:
stop = True stop = True
iteration += 1 iteration += 1
if stop: if stop:

View file

@ -27,9 +27,9 @@ class Parameterized(object):
def _get_param_names(self): def _get_param_names(self):
raise NotImplementedError, "this needs to be implemented to use the Parameterized class" raise NotImplementedError, "this needs to be implemented to use the Parameterized class"
def _get_print_names(self): #def _get_print_names(self):
""" Override for which names to print out, when using print m """ # """ Override for which names to print out, when using print m """
return self._get_param_names() # return self._get_param_names()
def pickle(self, filename, protocol=None): def pickle(self, filename, protocol=None):
if protocol is None: if protocol is None:
@ -63,8 +63,8 @@ class Parameterized(object):
""" """
Get the current state of the class, Get the current state of the class,
here just all the indices, rest can get recomputed here just all the indices, rest can get recomputed
For inheriting from Parameterized: For inheriting from Parameterized:
Allways append the state of the inherited object Allways append the state of the inherited object
and call down to the inherited object in setstate!! and call down to the inherited object in setstate!!
""" """
@ -231,17 +231,19 @@ class Parameterized(object):
def constrain_fixed(self, regexp, value=None): def constrain_fixed(self, regexp, value=None):
""" """
Arguments
---------
:param regexp: which parameters need to be fixed. :param regexp: which parameters need to be fixed.
:type regexp: ndarray(dtype=int) or regular expression object or string :type regexp: ndarray(dtype=int) or regular expression object or string
:param value: the vlaue to fix the parameters to. If the value is not specified, :param value: the vlaue to fix the parameters to. If the value is not specified,
the parameter is fixed to the current value the parameter is fixed to the current value
:type value: float :type value: float
Notes
----- **Notes**
Fixing a parameter which is tied to another, or constrained in some way will result in an error. Fixing a parameter which is tied to another, or constrained in some way will result in an error.
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes.
""" """
matches = self.grep_param_names(regexp) matches = self.grep_param_names(regexp)
overlap = set(matches).intersection(set(self.all_constrained_indices())) overlap = set(matches).intersection(set(self.all_constrained_indices()))
@ -336,26 +338,30 @@ class Parameterized(object):
n = [nn for i, nn in enumerate(n) if not i in remove] n = [nn for i, nn in enumerate(n) if not i in remove]
return n return n
@property #@property
def all(self): #def all(self):
return self.__str__(self._get_param_names()) # return self.__str__(self._get_param_names())
def __str__(self, names=None, nw=30): #def __str__(self, names=None, nw=30):
def __str__(self, nw=30):
""" """
Return a string describing the parameter names and their ties and constraints Return a string describing the parameter names and their ties and constraints
""" """
if names is None: names = self._get_param_names()
names = self._get_print_names() #if names is None:
name_indices = self.grep_param_names("|".join(names)) # names = self._get_print_names()
#name_indices = self.grep_param_names("|".join(names))
N = len(names) N = len(names)
if not N: if not N:
return "This object has no free parameters." return "This object has no free parameters."
header = ['Name', 'Value', 'Constraints', 'Ties'] header = ['Name', 'Value', 'Constraints', 'Ties']
values = self._get_params()[name_indices] # map(str,self._get_params()) values = self._get_params() # map(str,self._get_params())
#values = self._get_params()[name_indices] # map(str,self._get_params())
# sort out the constraints # sort out the constraints
constraints = [''] * len(self._get_param_names()) constraints = [''] * len(names)
#constraints = [''] * len(self._get_param_names())
for i, t in zip(self.constrained_indices, self.constraints): for i, t in zip(self.constrained_indices, self.constraints):
for ii in i: for ii in i:
constraints[ii] = t.__str__() constraints[ii] = t.__str__()
@ -368,6 +374,9 @@ class Parameterized(object):
for j in tie: for j in tie:
ties[j] = '(' + str(i) + ')' ties[j] = '(' + str(i) + ')'
if values.size == 1:
values = ['%.4f' %float(values)]
else:
values = ['%.4f' % float(v) for v in values] values = ['%.4f' % float(v) for v in values]
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])]) max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])]) max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])])
@ -383,3 +392,77 @@ class Parameterized(object):
return ('\n'.join([header_string[0], separator] + param_string)) + '\n' return ('\n'.join([header_string[0], separator] + param_string)) + '\n'
def grep_model(self,regexp):
regexp_indices = self.grep_param_names(regexp)
all_names = self._get_param_names()
names = [all_names[pj] for pj in regexp_indices]
N = len(names)
if not N:
return "Match not found."
header = ['Name', 'Value', 'Constraints', 'Ties']
all_values = self._get_params()
values = np.array([all_values[pj] for pj in regexp_indices])
constraints = [''] * len(names)
_constrained_indices,aux = self._pick_elements(regexp_indices,self.constrained_indices)
_constraints = [self.constraints[pj] for pj in aux]
for i, t in zip(_constrained_indices, _constraints):
for ii in i:
iii = regexp_indices.tolist().index(ii)
constraints[iii] = t.__str__()
_fixed_indices,aux = self._pick_elements(regexp_indices,self.fixed_indices)
for i in _fixed_indices:
for ii in i:
iii = regexp_indices.tolist().index(ii)
constraints[ii] = 'Fixed'
_tied_indices,aux = self._pick_elements(regexp_indices,self.tied_indices)
ties = [''] * len(names)
for i,ti in zip(_tied_indices,aux):
for ii in i:
iii = regexp_indices.tolist().index(ii)
ties[iii] = '(' + str(ti) + ')'
if values.size == 1:
values = ['%.4f' %float(values)]
else:
values = ['%.4f' % float(v) for v in values]
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])])
max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])])
cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-' * len(header_string[0])
param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n=names[i], v=values[i], c=constraints[i], t=ties[i], c0=cols[0], c1=cols[1], c2=cols[2], c3=cols[3]) for i in range(len(values))]
print header_string[0]
print separator
for string in param_string:
print string
def _pick_elements(self,regexp_ind,array_list):
"""Removes from array_list the elements different from regexp_ind"""
new_array_list = [] #New list with elements matching regexp_ind
array_indices = [] #Indices that matches the arrays in new_array_list and array_list
array_index = 0
for array in array_list:
_new = []
for ai in array:
if ai in regexp_ind:
_new.append(ai)
if len(_new):
new_array_list.append(np.array(_new))
array_indices.append(array_index)
array_index += 1
return new_array_list, array_indices

View file

@ -5,7 +5,7 @@ import numpy as np
import pylab as pb import pylab as pb
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri
from scipy import linalg from scipy import linalg
from ..likelihoods import Gaussian from ..likelihoods import Gaussian, EP,EP_Mixed_Noise
from gp_base import GPBase from gp_base import GPBase
class SparseGP(GPBase): class SparseGP(GPBase):
@ -16,16 +16,17 @@ class SparseGP(GPBase):
:type X: np.ndarray (num_data x input_dim) :type X: np.ndarray (num_data x input_dim)
:param likelihood: a likelihood instance, containing the observed data :param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace) :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
:param kernel : the kernel (covariance function). See link kernels :param kernel: the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance :type kernel: a GPy.kern.kern instance
:param X_variance: The uncertainty in the measurements of X (Gaussian variance) :param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (num_data x input_dim) | None :type X_variance: np.ndarray (num_data x input_dim) | None
:param Z: inducing inputs (optional, see note) :param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None :type Z: np.ndarray (num_inducing x input_dim) | None
:param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None) :param num_inducing: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type num_inducing: int :type num_inducing: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool :type normalize_(X|Y): bool
""" """
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
@ -109,7 +110,6 @@ class SparseGP(GPBase):
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1) tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1)
self._A = tdot(tmp) self._A = tdot(tmp)
# factor B # factor B
self.B = np.eye(self.num_inducing) + self._A self.B = np.eye(self.num_inducing) + self._A
self.LB = jitchol(self.B) self.LB = jitchol(self.B)
@ -139,6 +139,7 @@ class SparseGP(GPBase):
dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi) dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :] self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
else: else:
@ -160,9 +161,27 @@ class SparseGP(GPBase):
# save computation here. # save computation here.
self.partial_for_likelihood = None self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic: elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented"
if self.has_uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
else: else:
# likelihood is not heterscedatic
LBi = chol_inv(self.LB)
Lmi_psi1, nil = dtrtrs(self._Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0)
_LBi_Lmi_psi1, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
self.partial_for_likelihood = -0.5 * self.likelihood.precision + 0.5 * self.likelihood.V**2
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0 - np.sum(Lmi_psi1**2,0))[:,None] * self.likelihood.precision**2
self.partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*self.likelihood.precision**2
self.partial_for_likelihood += -np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * self.likelihood.Y * self.likelihood.precision**2
self.partial_for_likelihood += 0.5*np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * self.likelihood.precision**2
else:
# likelihood is not heteroscedatic
self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision) self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit) self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit)
@ -194,10 +213,10 @@ class SparseGP(GPBase):
return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], [])\ return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], [])\
+ self.kern._get_param_names_transformed() + self.likelihood._get_param_names() + self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def _get_print_names(self): #def _get_print_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names() # return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self): def update_likelihood_approximation(self, **kwargs):
""" """
Approximates a non-gaussian likelihood using Expectation Propagation Approximates a non-gaussian likelihood using Expectation Propagation
@ -211,10 +230,10 @@ class SparseGP(GPBase):
Kmmi = tdot(Lmi.T) Kmmi = tdot(Lmi.T)
diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)]) diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)])
self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi, **kwargs) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
# raise NotImplementedError, "EP approximation not implemented for uncertain inputs" # raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else: else:
self.likelihood.fit_DTC(self.Kmm, self.psi1.T) self.likelihood.fit_DTC(self.Kmm, self.psi1.T, **kwargs)
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) # self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP self._set_params(self._get_params()) # update the GP
@ -240,7 +259,7 @@ class SparseGP(GPBase):
""" """
The derivative of the bound wrt the inducing inputs Z The derivative of the bound wrt the inducing inputs Z
""" """
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance) dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance) dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
@ -274,7 +293,7 @@ class SparseGP(GPBase):
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0) var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
else: else:
# assert which_p.Tarts=='all', "swithching out parts of variational kernels is not implemented" # assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
mu = np.dot(Kx, self.Cpsi1V) mu = np.dot(Kx, self.Cpsi1V)
if full_cov: if full_cov:
@ -288,17 +307,18 @@ class SparseGP(GPBase):
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False): def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
""" """
Predict the function(s) at the new point(s) Xnew. Predict the function(s) at the new point(s) Xnew.
Arguments **Arguments**
---------
:param Xnew: The points at which to make a prediction :param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim :type Xnew: np.ndarray, Nnew x self.input_dim
:param X_variance_new: The uncertainty in the prediction points :param X_variance_new: The uncertainty in the prediction points
:type X_variance_new: np.ndarray, Nnew x self.input_dim :type X_variance_new: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction :param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools) :type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal :param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool :type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim :rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
@ -322,17 +342,17 @@ class SparseGP(GPBase):
return mean, var, _025pm, _975pm return mean, var, _025pm, _975pm
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None): def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None, output=None):
if ax is None: if ax is None:
fig = pb.figure(num=fignum) fig = pb.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
if which_data is 'all': if which_data is 'all':
which_data = slice(None) which_data = slice(None)
GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax) GPBase.plot(self, samples=0, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax, output=output)
if not hasattr(self,'multioutput'):
# add the inducing inputs and some errorbars
if self.X.shape[1] == 1: if self.X.shape[1] == 1:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
@ -345,3 +365,108 @@ class SparseGP(GPBase):
elif self.X.shape[1] == 2: elif self.X.shape[1] == 2:
Zu = self.Z * self._Xscale + self._Xoffset Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu[:, 0], Zu[:, 1], 'wo') ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
else:
if self.X.shape[1] == 2 and hasattr(self,'multioutput'):
"""
Xu = self.X[self.X[:,-1]==output,:]
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
Xu = self.X[self.X[:,-1]==output ,0:1] #??
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
"""
Zu = self.Z[self.Z[:,-1]==output,:]
Zu = self.Z * self._Xscale + self._Xoffset
Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
#ax.set_ylim(ax.get_ylim()[0],)
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
"""
For a specific output, predict the function at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
.. Note:: For multiple output models only
"""
assert hasattr(self,'multioutput')
index = np.ones_like(Xnew)*output
Xnew = np.hstack((Xnew,index))
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output)
return mean, var, _025pm, _975pm
def _raw_predict_single_output(self, _Xnew, output=0, X_variance_new=None, which_parts='all', full_cov=False,stop=False):
"""
Internal helper function for making predictions for a specific output,
does not account for normalization or likelihood
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param output: output to predict
:type output: integer in {0,..., num_outputs-1}
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
.. Note:: For multiple output models only
"""
Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi)
if self.Cpsi1V is None:
psi1V = np.dot(self.psi1.T,self.likelihood.V)
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
tmp, _ = dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1)
assert hasattr(self,'multioutput')
index = np.ones_like(_Xnew)*output
_Xnew = np.hstack((_Xnew,index))
if X_variance_new is None:
Kx = self.kern.K(self.Z, _Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.Cpsi1V)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
else:
Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new)
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
raise NotImplementedError, "TODO"
else:
Kxx = self.kern.psi0(self.Z, _Xnew, X_variance_new)
psi2 = self.kern.psi2(self.Z, _Xnew, X_variance_new)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var[:, None]

View file

@ -14,6 +14,7 @@ import sys
class SVIGP(GPBase): class SVIGP(GPBase):
""" """
Stochastic Variational inference in a Gaussian Process Stochastic Variational inference in a Gaussian Process
:param X: inputs :param X: inputs
@ -22,25 +23,26 @@ class SVIGP(GPBase):
:type Y: np.ndarray of observations (N x D) :type Y: np.ndarray of observations (N x D)
:param batchsize: the size of a h :param batchsize: the size of a h
Additional kwargs are used as for a sparse GP. They include Additional kwargs are used as for a sparse GP. They include:
:param q_u: canonical parameters of the distribution squasehd into a 1D array :param q_u: canonical parameters of the distribution squasehd into a 1D array
:type q_u: np.ndarray :type q_u: np.ndarray
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None) :param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int :type M: int
:param kernel : the kernel/covariance function. See link kernels :param kernel: the kernel/covariance function. See link kernels
:type kernel: a GPy kernel :type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note) :param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None :type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance) :param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None :type X_uncertainty: np.ndarray (N x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link) :param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None) :param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int :type M: int
:param beta: noise precision. TODO> ignore beta if doing EP :param beta: noise precision. TODO: ignore beta if doing EP
:type beta: float :type beta: float
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool :type normalize_(X|Y): bool
""" """

View file

@ -18,9 +18,11 @@ class transformation(object):
def gradfactor(self, f): def gradfactor(self, f):
""" df_dx evaluated at self.f(x)=f""" """ df_dx evaluated at self.f(x)=f"""
raise NotImplementedError raise NotImplementedError
def initialize(self, f): def initialize(self, f):
""" produce a sensible initial value for f(x)""" """ produce a sensible initial value for f(x)"""
raise NotImplementedError raise NotImplementedError
def __str__(self): def __str__(self):
raise NotImplementedError raise NotImplementedError
@ -42,15 +44,13 @@ class logexp(transformation):
class negative_logexp(transformation): class negative_logexp(transformation):
domain = NEGATIVE domain = NEGATIVE
def f(self, x): def f(self, x):
return -logexp.f(x) #np.log(1. + np.exp(x)) return -logexp.f(x)
def finv(self, f): def finv(self, f):
return logexp.finv(-f) #np.log(np.exp(-f) - 1.) return logexp.finv(-f)
def gradfactor(self, f): def gradfactor(self, f):
return -logexp.gradfactor(-f) return -logexp.gradfactor(-f)
#ef = np.exp(-f)
#return -(ef - 1.) / ef
def initialize(self, f): def initialize(self, f):
return -logexp.initialize(f) #np.abs(f) return -logexp.initialize(f)
def __str__(self): def __str__(self):
return '(-ve)' return '(-ve)'
@ -82,7 +82,6 @@ class logexp_clipped(logexp):
return '(+ve_c)' return '(+ve_c)'
class exponent(transformation): class exponent(transformation):
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative exponent below. See old MATLAB code.
domain = POSITIVE domain = POSITIVE
def f(self, x): def f(self, x):
return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val)) return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val))

View file

@ -10,31 +10,11 @@ import numpy as np
import GPy import GPy
default_seed = 10000 default_seed = 10000
def crescent_data(seed=default_seed, kernel=None): # FIXME
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation.
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1] = 0
m = GPy.models.GPClassification(data['X'], Y)
#m.update_likelihood_approximation()
#m.optimize()
m.pseudo_EM()
print(m)
m.plot()
return m
def oil(num_inducing=50, max_iters=100, kernel=None): def oil(num_inducing=50, max_iters=100, kernel=None):
""" """
Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
""" """
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
X = data['X'] X = data['X']
@ -64,8 +44,10 @@ def oil(num_inducing=50, max_iters=100, kernel=None):
def toy_linear_1d_classification(seed=default_seed): def toy_linear_1d_classification(seed=default_seed):
""" """
Simple 1D classification example Simple 1D classification example
:param seed : seed value for data generation (default is 4).
:param seed: seed value for data generation (default is 4).
:type seed: int :type seed: int
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
@ -92,8 +74,10 @@ def toy_linear_1d_classification(seed=default_seed):
def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed): def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
""" """
Sparse 1D classification example Sparse 1D classification example
:param seed : seed value for data generation (default is 4).
:param seed: seed value for data generation (default is 4).
:type seed: int :type seed: int
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
@ -118,51 +102,67 @@ def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
return m return m
def sparse_crescent_data(num_inducing=10, seed=default_seed, kernel=None): def toy_heaviside(seed=default_seed):
""" """
Run a Gaussian process classification with DTC approxiamtion on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. Simple 1D classification example using a heavy side gp transformation
:param seed: seed value for data generation (default is 4).
:type seed: int
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
# Model definition
noise_model = GPy.likelihoods.binomial(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
likelihood = GPy.likelihoods.EP(Y,noise_model)
m = GPy.models.GPClassification(data['X'], likelihood=likelihood)
# Optimize
m.update_likelihood_approximation()
# Parameters optimization:
m.optimize()
#m.pseudo_EM()
# Plot
fig, axes = pb.subplots(2,1)
m.plot_f(ax=axes[0])
m.plot(ax=axes[1])
print(m)
return m
def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None):
"""
Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC']. :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation. :param inducing: number of inducing variables (only used for 'FITC' or 'DTC').
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int :type inducing: int
:param seed: seed value for data generation.
:type seed: int
:param kernel: kernel to use in the model
:type kernel: a GPy kernel
""" """
data = GPy.util.datasets.crescent_data(seed=seed) data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y'] Y = data['Y']
Y[Y.flatten()==-1]=0 Y[Y.flatten()==-1] = 0
if model_type == 'Full':
m = GPy.models.GPClassification(data['X'], Y,kernel=kernel)
elif model_type == 'DTC':
m = GPy.models.SparseGPClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing) m = GPy.models.SparseGPClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
m['.*len'] = 10. m['.*len'] = 10.
#m.update_likelihood_approximation()
#m.optimize()
m.pseudo_EM()
print(m)
m.plot()
return m
def FITC_crescent_data(num_inducing=10, seed=default_seed): elif model_type == 'FITC':
""" m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
Run a Gaussian process classification with FITC approximation on the crescent data. The demonstration uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
:param seed : seed value for data generation.
:type seed: int
:param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type num_inducing: int
"""
data = GPy.util.datasets.crescent_data(seed=seed)
Y = data['Y']
Y[Y.flatten()==-1]=0
m = GPy.models.FITCClassification(data['X'], Y,num_inducing=num_inducing)
m.constrain_bounded('.*len',1.,1e3)
m['.*len'] = 3. m['.*len'] = 3.
#m.update_likelihood_approximation()
#m.optimize()
m.pseudo_EM() m.pseudo_EM()
print(m) print(m)
m.plot() m.plot()
return m return m

View file

@ -26,7 +26,7 @@ def timing():
edited_real_sd = real_sd edited_real_sd = real_sd
kernel1 = GPy.kern.rbf(X.shape[1]) kernel1 = GPy.kern.rbf(X.shape[1])
t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm') corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
m = GPy.models.GPRegression(X, Yc.copy(), kernel1, likelihood=corrupt_stu_t_likelihood) m = GPy.models.GPRegression(X, Yc.copy(), kernel1, likelihood=corrupt_stu_t_likelihood)
m.ensure_default_constraints() m.ensure_default_constraints()
@ -55,7 +55,7 @@ def v_fail_test():
edited_real_sd = real_sd edited_real_sd = real_sd
print "Clean student t, rasm" print "Clean student t, rasm"
t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
m = GPy.models.GPRegression(X, Y.copy(), kernel1, likelihood=stu_t_likelihood) m = GPy.models.GPRegression(X, Y.copy(), kernel1, likelihood=stu_t_likelihood)
m.constrain_positive('') m.constrain_positive('')
@ -102,7 +102,7 @@ def student_t_obj_plane():
print mgp print mgp
kernelst = kernelgp.copy() kernelst = kernelgp.copy()
t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=(real_std**2)) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=(real_std**2))
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
m = GPy.models.GPRegression(X, Y, kernelst, likelihood=stu_t_likelihood) m = GPy.models.GPRegression(X, Y, kernelst, likelihood=stu_t_likelihood)
m.ensure_default_constraints() m.ensure_default_constraints()
@ -155,7 +155,7 @@ def student_t_f_check():
kernelst = kernelgp.copy() kernelst = kernelgp.copy()
#kernelst += GPy.kern.bias(X.shape[1]) #kernelst += GPy.kern.bias(X.shape[1])
t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=0.05) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=0.05)
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
m = GPy.models.GPRegression(X, Y.copy(), kernelst, likelihood=stu_t_likelihood) m = GPy.models.GPRegression(X, Y.copy(), kernelst, likelihood=stu_t_likelihood)
#m['rbf_v'] = mgp._get_params()[0] #m['rbf_v'] = mgp._get_params()[0]
@ -207,7 +207,7 @@ def student_t_fix_optimise_check():
kernelst = kernelgp.copy() kernelst = kernelgp.copy()
real_stu_t_std2 = (real_std**2)*((deg_free - 2)/float(deg_free)) real_stu_t_std2 = (real_std**2)*((deg_free - 2)/float(deg_free))
t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=real_stu_t_std2) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=real_stu_t_std2)
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
plt.figure(1) plt.figure(1)
@ -350,7 +350,7 @@ def debug_student_t_noise_approx():
#edited_real_sd = real_sd #edited_real_sd = real_sd
print "Clean student t, rasm" print "Clean student t, rasm"
t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
m = GPy.models.GPRegression(X, Y, kernel6, likelihood=stu_t_likelihood) m = GPy.models.GPRegression(X, Y, kernel6, likelihood=stu_t_likelihood)
@ -385,7 +385,7 @@ def debug_student_t_noise_approx():
return m return m
#print "Clean student t, ncg" #print "Clean student t, ncg"
#t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) #t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
#stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg') #stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg')
#m = GPy.models.GPRegression(X, stu_t_likelihood, kernel3) #m = GPy.models.GPRegression(X, stu_t_likelihood, kernel3)
#m.ensure_default_constraints() #m.ensure_default_constraints()
@ -446,7 +446,7 @@ def student_t_approx():
plt.figure(1) plt.figure(1)
plt.suptitle('Gaussian likelihood') plt.suptitle('Gaussian likelihood')
# Kernel object # Kernel object
kernel1 = GPy.kern.rbf(X.shape[1]) kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
kernel2 = kernel1.copy() kernel2 = kernel1.copy()
kernel3 = kernel1.copy() kernel3 = kernel1.copy()
kernel4 = kernel1.copy() kernel4 = kernel1.copy()
@ -487,13 +487,12 @@ def student_t_approx():
edited_real_sd = initial_var_guess edited_real_sd = initial_var_guess
print "Clean student t, rasm" print "Clean student t, rasm"
t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm') stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution, opt='rasm')
m = GPy.models.GPRegression(X, Y.copy(), kernel6, likelihood=stu_t_likelihood) m = GPy.models.GPRegression(X, Y.copy(), kernel6, likelihood=stu_t_likelihood)
m.ensure_default_constraints() m.ensure_default_constraints()
m.constrain_positive('t_noise') m.constrain_positive('t_noise')
m.randomize() m.randomize()
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
#m.update_likelihood_approximation() #m.update_likelihood_approximation()
m.optimize() m.optimize()
print(m) print(m)
@ -504,15 +503,19 @@ def student_t_approx():
plt.title('Student-t rasm clean') plt.title('Student-t rasm clean')
print "Corrupt student t, rasm" print "Corrupt student t, rasm"
t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm') corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='rasm')
m = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood) m = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
m.ensure_default_constraints() m.ensure_default_constraints()
m.constrain_positive('t_noise') m.constrain_positive('t_noise')
m.constrain_fixed('white', 1e-4)
m.randomize() m.randomize()
#m.update_likelihood_approximation() #m.update_likelihood_approximation()
import ipdb; ipdb.set_trace() # XXX BREAKPOINT for a in range(1):
m.optimize() m.randomize()
m_start = m.copy()
print m
m.optimize('scg', messages=1)
print(m) print(m)
ax = plt.subplot(212) ax = plt.subplot(212)
m.plot(ax=ax) m.plot(ax=ax)
@ -524,7 +527,7 @@ def student_t_approx():
return m return m
#print "Clean student t, ncg" #print "Clean student t, ncg"
#t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) #t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
#stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg') #stu_t_likelihood = GPy.likelihoods.Laplace(Y, t_distribution, opt='ncg')
#m = GPy.models.GPRegression(X, Y, kernel3, likelihood=stu_t_likelihood) #m = GPy.models.GPRegression(X, Y, kernel3, likelihood=stu_t_likelihood)
#m.ensure_default_constraints() #m.ensure_default_constraints()
@ -538,7 +541,7 @@ def student_t_approx():
#plt.title('Student-t ncg clean') #plt.title('Student-t ncg clean')
#print "Corrupt student t, ncg" #print "Corrupt student t, ncg"
#t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=edited_real_sd) #t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
#corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='ncg') #corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution, opt='ncg')
#m = GPy.models.GPRegression(X, Y, kernel5, likelihood=corrupt_stu_t_likelihood) #m = GPy.models.GPRegression(X, Y, kernel5, likelihood=corrupt_stu_t_likelihood)
#m.ensure_default_constraints() #m.ensure_default_constraints()
@ -553,7 +556,7 @@ def student_t_approx():
###with a student t distribution, since it has heavy tails it should work well ###with a student t distribution, since it has heavy tails it should work well
###likelihood_function = student_t(deg_free, sigma2=real_var) ###likelihood_function = student_t(deg_free=deg_free, sigma2=real_var)
###lap = Laplace(Y, likelihood_function) ###lap = Laplace(Y, likelihood_function)
###cov = kernel.K(X) ###cov = kernel.K(X)
###lap.fit_full(cov) ###lap.fit_full(cov)
@ -621,7 +624,7 @@ def gaussian_f_check():
kernelg = kernelgp.copy() kernelg = kernelgp.copy()
#kernelst += GPy.kern.bias(X.shape[1]) #kernelst += GPy.kern.bias(X.shape[1])
N, D = X.shape N, D = X.shape
g_distribution = GPy.likelihoods.functions.Gaussian(variance=0.1, N=N, D=D) g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=0.1, N=N, D=D)
g_likelihood = GPy.likelihoods.Laplace(Y.copy(), g_distribution, opt='rasm') g_likelihood = GPy.likelihoods.Laplace(Y.copy(), g_distribution, opt='rasm')
m = GPy.models.GPRegression(X, Y, kernelg, likelihood=g_likelihood) m = GPy.models.GPRegression(X, Y, kernelg, likelihood=g_likelihood)
m.likelihood.X = X m.likelihood.X = X
@ -698,7 +701,7 @@ def boston_example():
print "Gaussian Laplace GP" print "Gaussian Laplace GP"
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
N, D = Y_train.shape N, D = Y_train.shape
g_distribution = GPy.likelihoods.functions.Gaussian(variance=noise, N=N, D=D) g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution, opt='rasm') g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution, opt='rasm')
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=g_likelihood) mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=g_likelihood)
mg.ensure_default_constraints() mg.ensure_default_constraints()
@ -725,7 +728,7 @@ def boston_example():
deg_free = 1 deg_free = 1
print "Student-T GP {}df".format(deg_free) print "Student-T GP {}df".format(deg_free)
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=noise) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution, opt='rasm') stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution, opt='rasm')
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood) mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
mstu_t.ensure_default_constraints() mstu_t.ensure_default_constraints()
@ -751,7 +754,7 @@ def boston_example():
deg_free = 2 deg_free = 2
print "Student-T GP {}df".format(deg_free) print "Student-T GP {}df".format(deg_free)
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=noise) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution, opt='rasm') stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution, opt='rasm')
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood) mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
mstu_t.ensure_default_constraints() mstu_t.ensure_default_constraints()
@ -778,7 +781,7 @@ def boston_example():
deg_free = 3 deg_free = 3
print "Student-T GP {}df".format(deg_free) print "Student-T GP {}df".format(deg_free)
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=noise) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution, opt='rasm') stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution, opt='rasm')
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood) mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
mstu_t.ensure_default_constraints() mstu_t.ensure_default_constraints()
@ -804,7 +807,7 @@ def boston_example():
deg_free = 5 deg_free = 5
print "Student-T GP {}df".format(deg_free) print "Student-T GP {}df".format(deg_free)
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
t_distribution = GPy.likelihoods.functions.StudentT(deg_free, sigma2=noise) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution, opt='rasm') stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution, opt='rasm')
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood) mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu, likelihood=stu_t_likelihood)
mstu_t.ensure_default_constraints() mstu_t.ensure_default_constraints()

View file

@ -9,9 +9,9 @@ import pylab as pb
import numpy as np import numpy as np
import GPy import GPy
def coregionalisation_toy2(max_iters=100): def coregionalization_toy2(max_iters=100):
""" """
A simple demonstration of coregionalisation on two sinusoidal functions. A simple demonstration of coregionalization on two sinusoidal functions.
""" """
X1 = np.random.rand(50, 1) * 8 X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5 X2 = np.random.rand(30, 1) * 5
@ -22,8 +22,8 @@ def coregionalisation_toy2(max_iters=100):
Y = np.vstack((Y1, Y2)) Y = np.vstack((Y1, Y2))
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalise(2, 1) k2 = GPy.kern.coregionalize(2,1)
k = k1**k2 k = k1**k2 #k = k1.prod(k2,tensor=True)
m = GPy.models.GPRegression(X, Y, kernel=k) m = GPy.models.GPRegression(X, Y, kernel=k)
m.constrain_fixed('.*rbf_var', 1.) m.constrain_fixed('.*rbf_var', 1.)
# m.constrain_positive('.*kappa') # m.constrain_positive('.*kappa')
@ -40,41 +40,32 @@ def coregionalisation_toy2(max_iters=100):
pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2) pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
return m return m
def coregionalisation_toy(max_iters=100): def coregionalization_toy(max_iters=100):
""" """
A simple demonstration of coregionalisation on two sinusoidal functions. A simple demonstration of coregionalization on two sinusoidal functions.
""" """
X1 = np.random.rand(50, 1) * 8 X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5 X2 = np.random.rand(30, 1) * 5
index = np.vstack((np.zeros_like(X1), np.ones_like(X2))) X = np.vstack((X1, X2))
X = np.hstack((np.vstack((X1, X2)), index))
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
Y = np.vstack((Y1, Y2)) Y = np.vstack((Y1, Y2))
k1 = GPy.kern.rbf(1) k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(2, 2) m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
k = k1**k2 #k1.prod(k2, tensor=True)
m = GPy.models.GPRegression(X, Y, kernel=k)
m.constrain_fixed('.*rbf_var', 1.) m.constrain_fixed('.*rbf_var', 1.)
# m.constrain_positive('kappa')
m.optimize(max_iters=max_iters) m.optimize(max_iters=max_iters)
pb.figure() fig, axes = pb.subplots(2,1)
Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1)))) m.plot(output=0,ax=axes[0])
Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1)))) m.plot(output=1,ax=axes[1])
mean, var, low, up = m.predict(Xtest1) axes[0].set_title('Output 0')
GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up) axes[1].set_title('Output 1')
mean, var, low, up = m.predict(Xtest2)
GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
return m return m
def coregionalization_sparse(max_iters=100):
def coregionalisation_sparse(max_iters=100):
""" """
A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations. A simple demonstration of coregionalization on two sinusoidal functions using sparse approximations.
""" """
X1 = np.random.rand(500, 1) * 8 X1 = np.random.rand(500, 1) * 8
X2 = np.random.rand(300, 1) * 5 X2 = np.random.rand(300, 1) * 5
@ -84,33 +75,18 @@ def coregionalisation_sparse(max_iters=100):
Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
Y = np.vstack((Y1, Y2)) Y = np.vstack((Y1, Y2))
num_inducing = 40
Z = np.hstack((np.random.rand(num_inducing, 1) * 8, np.random.randint(0, 2, num_inducing)[:, None]))
k1 = GPy.kern.rbf(1) k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(2, 2)
k = k1**k2 #.prod(k2, tensor=True) # + GPy.kern.white(2,0.001)
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z) m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1],num_inducing=20)
m.constrain_fixed('.*rbf_var', 1.) m.constrain_fixed('.*rbf_var',1.)
m.constrain_fixed('iip') m.optimize(messages=1)
m.constrain_bounded('noise_variance', 1e-3, 1e-1) #m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')
# m.optimize_restarts(5, robust=True, messages=1, max_iters=max_iters, optimizer='bfgs')
m.optimize(max_iters=max_iters)
# plotting: fig, axes = pb.subplots(2,1)
pb.figure() m.plot(output=0,ax=axes[0])
Xtest1 = np.hstack((np.linspace(0, 9, 100)[:, None], np.zeros((100, 1)))) m.plot(output=1,ax=axes[1])
Xtest2 = np.hstack((np.linspace(0, 9, 100)[:, None], np.ones((100, 1)))) axes[0].set_title('Output 0')
mean, var, low, up = m.predict(Xtest1) axes[1].set_title('Output 1')
GPy.util.plot.gpplot(Xtest1[:, 0], mean, low, up)
mean, var, low, up = m.predict(Xtest2)
GPy.util.plot.gpplot(Xtest2[:, 0], mean, low, up)
pb.plot(X1[:, 0], Y1[:, 0], 'rx', mew=2)
pb.plot(X2[:, 0], Y2[:, 0], 'gx', mew=2)
y = pb.ylim()[0]
pb.plot(Z[:, 0][Z[:, 1] == 0], np.zeros(np.sum(Z[:, 1] == 0)) + y, 'r|', mew=2)
pb.plot(Z[:, 0][Z[:, 1] == 1], np.zeros(np.sum(Z[:, 1] == 1)) + y, 'g|', mew=2)
return m return m
def epomeo_gpx(max_iters=100): def epomeo_gpx(max_iters=100):
@ -136,7 +112,7 @@ def epomeo_gpx(max_iters=100):
np.random.randint(0, 4, num_inducing)[:, None])) np.random.randint(0, 4, num_inducing)[:, None]))
k1 = GPy.kern.rbf(1) k1 = GPy.kern.rbf(1)
k2 = GPy.kern.coregionalise(output_dim=5, rank=5) k2 = GPy.kern.coregionalize(output_dim=5, rank=5)
k = k1**k2 k = k1**k2
m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True) m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True)
@ -156,7 +132,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
length_scales = np.linspace(0.1, 60., resolution) length_scales = np.linspace(0.1, 60., resolution)
log_SNRs = np.linspace(-3., 4., resolution) log_SNRs = np.linspace(-3., 4., resolution)
data = GPy.util.datasets.della_gatta_TRP63_gene_expression(gene_number) data = GPy.util.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number)
# data['Y'] = data['Y'][0::2, :] # data['Y'] = data['Y'][0::2, :]
# data['X'] = data['X'][0::2, :] # data['X'] = data['X'][0::2, :]
@ -401,8 +377,6 @@ def silhouette(max_iters=100):
print(m) print(m)
return m return m
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100): def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100):
"""Run a 1D example of a sparse GP regression.""" """Run a 1D example of a sparse GP regression."""
# sample inputs and outputs # sample inputs and outputs

View file

@ -233,7 +233,7 @@ class CGD(Async_Optimize):
""" """
opt_async(self, f, df, x0, callback, update_rule=FletcherReeves, opt_async(self, f, df, x0, callback, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6, messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs) report_every=10, \*args, \*\*kwargs)
callback gets called every `report_every` iterations callback gets called every `report_every` iterations
@ -244,16 +244,14 @@ class CGD(Async_Optimize):
f, and df will be called with f, and df will be called with
f(xi, *args, **kwargs) f(xi, \*args, \*\*kwargs)
df(xi, *args, **kwargs) df(xi, \*args, \*\*kwargs)
**returns** **Returns:**
-----------
Started `Process` object, optimizing asynchronously Started `Process` object, optimizing asynchronously
**calls** **Calls:**
---------
callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message) callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
@ -265,7 +263,7 @@ class CGD(Async_Optimize):
""" """
opt(self, f, df, x0, callback=None, update_rule=FletcherReeves, opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6, messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
report_every=10, *args, **kwargs) report_every=10, \*args, \*\*kwargs)
Minimize f, calling callback every `report_every` iterations with following syntax: Minimize f, calling callback every `report_every` iterations with following syntax:
@ -276,11 +274,10 @@ class CGD(Async_Optimize):
f, and df will be called with f, and df will be called with
f(xi, *args, **kwargs) f(xi, \*args, \*\*kwargs)
df(xi, *args, **kwargs) df(xi, \*args, \*\*kwargs)
**returns** **returns**
---------
x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message

View file

@ -29,7 +29,7 @@ class Optimizer():
""" """
def __init__(self, x_init, messages=False, model=None, max_f_eval=1e4, max_iters=1e3, def __init__(self, x_init, messages=False, model=None, max_f_eval=1e4, max_iters=1e3,
ftol=None, gtol=None, xtol=None): ftol=None, gtol=None, xtol=None, bfgs_factor=None):
self.opt_name = None self.opt_name = None
self.x_init = x_init self.x_init = x_init
self.messages = messages self.messages = messages
@ -39,6 +39,7 @@ class Optimizer():
self.status = None self.status = None
self.max_f_eval = int(max_f_eval) self.max_f_eval = int(max_f_eval)
self.max_iters = int(max_iters) self.max_iters = int(max_iters)
self.bfgs_factor = bfgs_factor
self.trace = None self.trace = None
self.time = "Not available" self.time = "Not available"
self.xtol = xtol self.xtol = xtol
@ -128,9 +129,11 @@ class opt_lbfgsb(Optimizer):
print "WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it" print "WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it"
if self.gtol is not None: if self.gtol is not None:
opt_dict['pgtol'] = self.gtol opt_dict['pgtol'] = self.gtol
if self.bfgs_factor is not None:
opt_dict['factr'] = self.bfgs_factor
opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint, opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint,
maxfun=self.max_f_eval, **opt_dict) maxfun=self.max_iters, **opt_dict)
self.x_opt = opt_result[0] self.x_opt = opt_result[0]
self.f_opt = f_fp(self.x_opt)[0] self.f_opt = f_fp(self.x_opt)[0]
self.funct_eval = opt_result[2]['funcalls'] self.funct_eval = opt_result[2]['funcalls']

View file

@ -10,11 +10,10 @@ class opt_SGD(Optimizer):
""" """
Optimize using stochastic gradient descent. Optimize using stochastic gradient descent.
*** Parameters *** :param Model: reference to the Model object
Model: reference to the Model object :param iterations: number of iterations
iterations: number of iterations :param learning_rate: learning rate
learning_rate: learning rate :param momentum: momentum
momentum: momentum
""" """

View file

@ -5,7 +5,6 @@ import numpy as np
from kern import kern from kern import kern
import parts import parts
def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False): def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False):
""" """
Construct an RBF kernel Construct an RBF kernel
@ -18,6 +17,7 @@ def rbf_inv(input_dim,variance=1., inv_lengthscale=None,ARD=False):
:type lengthscale: float :type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension) :param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.rbf_inv.RBFInv(input_dim,variance,inv_lengthscale,ARD) part = parts.rbf_inv.RBFInv(input_dim,variance,inv_lengthscale,ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -34,6 +34,7 @@ def rbf(input_dim,variance=1., lengthscale=None,ARD=False):
:type lengthscale: float :type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension) :param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.rbf.RBF(input_dim,variance,lengthscale,ARD) part = parts.rbf.RBF(input_dim,variance,lengthscale,ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -42,11 +43,13 @@ def linear(input_dim,variances=None,ARD=False):
""" """
Construct a linear kernel. Construct a linear kernel.
Arguments :param input_dim: dimensionality of the kernel, obligatory
--------- :type input_dim: int
input_dimD (int), obligatory :param variances:
variances (np.ndarray) :type variances: np.ndarray
ARD (boolean) :param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
""" """
part = parts.linear.Linear(input_dim,variances,ARD) part = parts.linear.Linear(input_dim,variances,ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -65,37 +68,40 @@ def mlp(input_dim,variance=1., weight_variance=None,bias_variance=100.,ARD=False
:type bias_variance: float :type bias_variance: float
:param ARD: Auto Relevance Determination (allows for ARD version of covariance) :param ARD: Auto Relevance Determination (allows for ARD version of covariance)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD) part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
def gibbs(input_dim,variance=1., mapping=None): def gibbs(input_dim,variance=1., mapping=None):
""" """
Gibbs and MacKay non-stationary covariance function. Gibbs and MacKay non-stationary covariance function.
.. math:: .. math::
r = sqrt((x_i - x_j)'*(x_i - x_j)) r = \\sqrt{((x_i - x_j)'*(x_i - x_j))}
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x'))) k(x_i, x_j) = \\sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')} Z = \\sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
Where :math:`l(x)` is a function giving the length scale as a function of space.
where :math:`l(x)` is a function giving the length scale as a function of space.
This is the non stationary kernel proposed by Mark Gibbs in his 1997 This is the non stationary kernel proposed by Mark Gibbs in his 1997
thesis. It is similar to an RBF but has a length scale that varies thesis. It is similar to an RBF but has a length scale that varies
with input location. This leads to an additional term in front of with input location. This leads to an additional term in front of
the kernel. the kernel.
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used. The parameters are :math:`\\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
:param variance: the variance :math:`\sigma^2` :param variance: the variance :math:`\\sigma^2`
:type variance: float :type variance: float
:param mapping: the mapping that gives the lengthscale across the input space. :param mapping: the mapping that gives the lengthscale across the input space.
:type mapping: GPy.core.Mapping :type mapping: GPy.core.Mapping
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension. :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean :type ARD: Boolean
:rtype: Kernpart object :rtype: Kernpart object
@ -103,6 +109,12 @@ def gibbs(input_dim,variance=1., mapping=None):
part = parts.gibbs.Gibbs(input_dim,variance,mapping) part = parts.gibbs.Gibbs(input_dim,variance,mapping)
return kern(input_dim, [part]) return kern(input_dim, [part])
def hetero(input_dim, mapping=None, transform=None):
"""
"""
part = parts.hetero.Hetero(input_dim,mapping,transform)
return kern(input_dim, [part])
def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False): def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False):
""" """
Construct a polynomial kernel Construct a polynomial kernel
@ -119,6 +131,7 @@ def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2,
:type degree: int :type degree: int
:param ARD: Auto Relevance Determination (allows for ARD version of covariance) :param ARD: Auto Relevance Determination (allows for ARD version of covariance)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.poly.POLY(input_dim,variance,weight_variance,bias_variance,degree,ARD) part = parts.poly.POLY(input_dim,variance,weight_variance,bias_variance,degree,ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -127,14 +140,43 @@ def white(input_dim,variance=1.):
""" """
Construct a white kernel. Construct a white kernel.
Arguments :param input_dim: dimensionality of the kernel, obligatory
--------- :type input_dim: int
input_dimD (int), obligatory :param variance: the variance of the kernel
variance (float) :type variance: float
""" """
part = parts.white.White(input_dim,variance) part = parts.white.White(input_dim,variance)
return kern(input_dim, [part]) return kern(input_dim, [part])
def eq_ode1(output_dim, W=None, rank=1, kappa=None, length_scale=1., decay=None, delay=None):
"""Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
This outputs of this kernel have the form
.. math::
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
:param output_dim: number of outputs driven by latent function.
:type output_dim: int
:param W: sensitivities of each output to the latent driving function.
:type W: ndarray (output_dim x rank).
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
:type rank: int
:param decay: decay rates for the first order system.
:type decay: array of length output_dim.
:param delay: delay between latent force and output response.
:type delay: array of length output_dim.
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
:type kappa: array of length output_dim.
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
"""
part = parts.eq_ode1.Eq_ode1(output_dim, W, rank, kappa, length_scale, decay, delay)
return kern(2, [part])
def exponential(input_dim,variance=1., lengthscale=None, ARD=False): def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
""" """
Construct an exponential kernel Construct an exponential kernel
@ -147,6 +189,7 @@ def exponential(input_dim,variance=1., lengthscale=None, ARD=False):
:type lengthscale: float :type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension) :param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.exponential.Exponential(input_dim,variance, lengthscale, ARD) part = parts.exponential.Exponential(input_dim,variance, lengthscale, ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -163,6 +206,7 @@ def Matern32(input_dim,variance=1., lengthscale=None, ARD=False):
:type lengthscale: float :type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension) :param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.Matern32.Matern32(input_dim,variance, lengthscale, ARD) part = parts.Matern32.Matern32(input_dim,variance, lengthscale, ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -179,6 +223,7 @@ def Matern52(input_dim, variance=1., lengthscale=None, ARD=False):
:type lengthscale: float :type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension) :param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean :type ARD: Boolean
""" """
part = parts.Matern52.Matern52(input_dim, variance, lengthscale, ARD) part = parts.Matern52.Matern52(input_dim, variance, lengthscale, ARD)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -187,10 +232,11 @@ def bias(input_dim, variance=1.):
""" """
Construct a bias kernel. Construct a bias kernel.
Arguments :param input_dim: dimensionality of the kernel, obligatory
--------- :type input_dim: int
input_dim (int), obligatory :param variance: the variance of the kernel
variance (float) :type variance: float
""" """
part = parts.bias.Bias(input_dim, variance) part = parts.bias.Bias(input_dim, variance)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -198,10 +244,15 @@ def bias(input_dim, variance=1.):
def finite_dimensional(input_dim, F, G, variances=1., weights=None): def finite_dimensional(input_dim, F, G, variances=1., weights=None):
""" """
Construct a finite dimensional kernel. Construct a finite dimensional kernel.
input_dim: int - the number of input dimensions
F: np.array of functions with shape (n,) - the n basis functions :param input_dim: the number of input dimensions
G: np.array with shape (n,n) - the Gram matrix associated to F :type input_dim: int
variances : np.ndarray with shape (n,) :param F: np.array of functions with shape (n,) - the n basis functions
:type F: np.array
:param G: np.array with shape (n,n) - the Gram matrix associated to F
:type G: np.array
:param variances: np.ndarray with shape (n,)
:type: np.ndarray
""" """
part = parts.finite_dimensional.FiniteDimensional(input_dim, F, G, variances, weights) part = parts.finite_dimensional.FiniteDimensional(input_dim, F, G, variances, weights)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -214,6 +265,7 @@ def spline(input_dim, variance=1.):
:type input_dim: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
""" """
part = parts.spline.Spline(input_dim, variance) part = parts.spline.Spline(input_dim, variance)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -226,43 +278,78 @@ def Brownian(input_dim, variance=1.):
:type input_dim: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
""" """
part = parts.Brownian.Brownian(input_dim, variance) part = parts.Brownian.Brownian(input_dim, variance)
return kern(input_dim, [part]) return kern(input_dim, [part])
try: try:
import sympy as sp import sympy as sp
from sympykern import spkern
from sympy.parsing.sympy_parser import parse_expr
sympy_available = True sympy_available = True
except ImportError: except ImportError:
sympy_available = False sympy_available = False
if sympy_available: if sympy_available:
from parts.sympykern import spkern
from sympy.parsing.sympy_parser import parse_expr
from GPy.util.symbolic import sinc
def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.): def rbf_sympy(input_dim, ARD=False, variance=1., lengthscale=1.):
""" """
Radial Basis Function covariance. Radial Basis Function covariance.
""" """
X = [sp.var('x%i' % i) for i in range(input_dim)] X = [sp.var('x%i' % i) for i in range(input_dim)]
Z = [sp.var('z%i' % i) for i in range(input_dim)] Z = [sp.var('z%i' % i) for i in range(input_dim)]
rbf_variance = sp.var('rbf_variance',positive=True) variance = sp.var('variance',positive=True)
if ARD: if ARD:
rbf_lengthscales = [sp.var('rbf_lengthscale_%i' % i, positive=True) for i in range(input_dim)] lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2' % (i, i, i) for i in range(input_dim)]) dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
dist = parse_expr(dist_string) dist = parse_expr(dist_string)
f = rbf_variance*sp.exp(-dist/2.) f = variance*sp.exp(-dist/2.)
else: else:
rbf_lengthscale = sp.var('rbf_lengthscale',positive=True) lengthscale = sp.var('lengthscale',positive=True)
dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)]) dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
dist = parse_expr(dist_string) dist = parse_expr(dist_string)
f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2)) f = variance*sp.exp(-dist/(2*lengthscale**2))
return kern(input_dim, [spkern(input_dim, f)]) return kern(input_dim, [spkern(input_dim, f, name='rbf_sympy')])
def sympykern(input_dim, k): def sinc(input_dim, ARD=False, variance=1., lengthscale=1.):
""" """
A kernel from a symbolic sympy representation TODO: Not clear why this isn't working, suggests argument of sinc is not a number.
sinc covariance funciton
""" """
return kern(input_dim, [spkern(input_dim, k)]) X = [sp.var('x%i' % i) for i in range(input_dim)]
Z = [sp.var('z%i' % i) for i in range(input_dim)]
variance = sp.var('variance',positive=True)
if ARD:
lengthscales = [sp.var('lengthscale_%i' % i, positive=True) for i in range(input_dim)]
dist_string = ' + '.join(['(x%i-z%i)**2/lengthscale_%i**2' % (i, i, i) for i in range(input_dim)])
dist = parse_expr(dist_string)
f = variance*sinc(sp.pi*sp.sqrt(dist))
else:
lengthscale = sp.var('lengthscale',positive=True)
dist_string = ' + '.join(['(x%i-z%i)**2' % (i, i) for i in range(input_dim)])
dist = parse_expr(dist_string)
f = variance*sinc(sp.pi*sp.sqrt(dist)/lengthscale)
return kern(input_dim, [spkern(input_dim, f, name='sinc')])
def sympykern(input_dim, k,name=None):
"""
A base kernel object, where all the hard work in done by sympy.
:param k: the covariance function
:type k: a positive definite sympy function of x1, z1, x2, z2...
To construct a new sympy kernel, you'll need to define:
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
- that's it! we'll extract the variables from the function k.
Note:
- to handle multiple inputs, call them x1, z1, etc
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
"""
return kern(input_dim, [spkern(input_dim, k,name)])
del sympy_available del sympy_available
def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi): def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
@ -279,6 +366,7 @@ def periodic_exponential(input_dim=1, variance=1., lengthscale=None, period=2 *
:type period: float :type period: float
:param n_freq: the number of frequencies considered for the periodic subspace :param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int :type n_freq: int
""" """
part = parts.periodic_exponential.PeriodicExponential(input_dim, variance, lengthscale, period, n_freq, lower, upper) part = parts.periodic_exponential.PeriodicExponential(input_dim, variance, lengthscale, period, n_freq, lower, upper)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -297,6 +385,7 @@ def periodic_Matern32(input_dim, variance=1., lengthscale=None, period=2 * np.pi
:type period: float :type period: float
:param n_freq: the number of frequencies considered for the periodic subspace :param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int :type n_freq: int
""" """
part = parts.periodic_Matern32.PeriodicMatern32(input_dim, variance, lengthscale, period, n_freq, lower, upper) part = parts.periodic_Matern32.PeriodicMatern32(input_dim, variance, lengthscale, period, n_freq, lower, upper)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -315,6 +404,7 @@ def periodic_Matern52(input_dim, variance=1., lengthscale=None, period=2 * np.pi
:type period: float :type period: float
:param n_freq: the number of frequencies considered for the periodic subspace :param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int :type n_freq: int
""" """
part = parts.periodic_Matern52.PeriodicMatern52(input_dim, variance, lengthscale, period, n_freq, lower, upper) part = parts.periodic_Matern52.PeriodicMatern52(input_dim, variance, lengthscale, period, n_freq, lower, upper)
return kern(input_dim, [part]) return kern(input_dim, [part])
@ -328,6 +418,7 @@ def prod(k1,k2,tensor=False):
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces :param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean :type tensor: Boolean
:rtype: kernel object :rtype: kernel object
""" """
part = parts.prod.Prod(k1, k2, tensor) part = parts.prod.Prod(k1, k2, tensor)
return kern(part.input_dim, [part]) return kern(part.input_dim, [part])
@ -340,29 +431,32 @@ def symmetric(k):
k_.parts = [symmetric.Symmetric(p) for p in k.parts] k_.parts = [symmetric.Symmetric(p) for p in k.parts]
return k_ return k_
def coregionalise(output_dim, rank=1, W=None, kappa=None): def coregionalize(output_dim,rank=1, W=None, kappa=None):
""" """
Coregionalisation kernel. Coregionlization matrix B, of the form:
Used for computing covariance functions of the form
.. math::
k_2(x, y)=\mathbf{B} k(x, y)
where
.. math:: .. math::
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I} \mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
:param output_dim: the number of output dimensions An intrinsic/linear coregionalization kernel of the form:
.. math::
k_2(x, y)=\mathbf{B} k(x, y)
it is obtainded as the tensor product between a kernel k(x,y) and B.
:param output_dim: the number of outputs to corregionalize
:type output_dim: int :type output_dim: int
:param rank: the rank of the coregionalisation matrix. :param rank: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
:type rank: int :type rank: int
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B. :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
:type W: ndarray :type W: numpy array of dimensionality (num_outpus, rank)
:param kappa: a diagonal term which allows the outputs to behave independently. :param kappa: a vector which allows the outputs to behave independently
:type kappa: numpy array of dimensionality (output_dim,)
:rtype: kernel object :rtype: kernel object
.. Note: see coregionalisation examples in GPy.examples.regression for some usage.
""" """
p = parts.coregionalise.Coregionalise(output_dim,rank,W,kappa) p = parts.coregionalize.Coregionalize(output_dim,rank,W,kappa)
return kern(1,[p]) return kern(1,[p])
@ -415,9 +509,58 @@ def independent_outputs(k):
def hierarchical(k): def hierarchical(k):
""" """
TODO THis can't be right! Construct a kernel with independent outputs from an existing kernel TODO This can't be right! Construct a kernel with independent outputs from an existing kernel
""" """
# for sl in k.input_slices: # for sl in k.input_slices:
# assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)" # assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
_parts = [parts.hierarchical.Hierarchical(k.parts)] _parts = [parts.hierarchical.Hierarchical(k.parts)]
return kern(k.input_dim+len(k.parts),_parts) return kern(k.input_dim+len(k.parts),_parts)
def build_lcm(input_dim, output_dim, kernel_list = [], rank=1,W=None,kappa=None):
"""
Builds a kernel of a linear coregionalization model
:input_dim: Input dimensionality
:output_dim: Number of outputs
:kernel_list: List of coregionalized kernels, each element in the list will be multiplied by a different corregionalization matrix
:type kernel_list: list of GPy kernels
:param rank: number tuples of the corregionalization parameters 'coregion_W'
:type rank: integer
..note the kernels dimensionality is overwritten to fit input_dim
"""
for k in kernel_list:
if k.input_dim <> input_dim:
k.input_dim = input_dim
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
k_coreg = coregionalize(output_dim,rank,W,kappa)
kernel = kernel_list[0]**k_coreg.copy()
for k in kernel_list[1:]:
k_coreg = coregionalize(output_dim,rank,W,kappa)
kernel += k**k_coreg.copy()
return kernel
def ODE_1(input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
"""
kernel resultiong from a first order ODE with OU driving GP
:param input_dim: the number of input dimension, has to be equal to one
:type input_dim: int
:param varianceU: variance of the driving GP
:type varianceU: float
:param lengthscaleU: lengthscale of the driving GP
:type lengthscaleU: float
:param varianceY: 'variance' of the transfer function
:type varianceY: float
:param lengthscaleY: 'lengthscale' of the transfer function
:type lengthscaleY: float
:rtype: kernel object
"""
part = parts.ODE_1.ODE_1(input_dim, varianceU, varianceY, lengthscaleU, lengthscaleY)
return kern(input_dim, [part])

View file

@ -1,6 +1,7 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np import numpy as np
import pylab as pb import pylab as pb
from ..core.parameterized import Parameterized from ..core.parameterized import Parameterized
@ -12,7 +13,9 @@ from matplotlib.transforms import offset_copy
class kern(Parameterized): class kern(Parameterized):
def __init__(self, input_dim, parts=[], input_slices=None): def __init__(self, input_dim, parts=[], input_slices=None):
""" """
This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where. This is the main kernel class for GPy. It handles multiple
(additive) kernel functions, and keeps track of various things
like which parameters live where.
The technical code for kernels is divided into _parts_ (see The technical code for kernels is divided into _parts_ (see
e.g. rbf.py). This object contains a list of parts, which are e.g. rbf.py). This object contains a list of parts, which are
@ -33,6 +36,11 @@ class kern(Parameterized):
self.input_dim = input_dim self.input_dim = input_dim
part_names = [k.name for k in self.parts]
self.name=''
for name in part_names:
self.name += name + '+'
self.name = self.name[:-1]
# deal with input_slices # deal with input_slices
if input_slices is None: if input_slices is None:
self.input_slices = [slice(None) for p in self.parts] self.input_slices = [slice(None) for p in self.parts]
@ -71,13 +79,15 @@ class kern(Parameterized):
def plot_ARD(self, fignum=None, ax=None, title='', legend=False): def plot_ARD(self, fignum=None, ax=None, title='', legend=False):
"""If an ARD kernel is present, it bar-plots the ARD parameters, """If an ARD kernel is present, it bar-plots the ARD parameters.
:param fignum: figure number of the plot :param fignum: figure number of the plot
:param ax: matplotlib axis to plot on :param ax: matplotlib axis to plot on
:param title: :param title:
title of the plot, title of the plot,
pass '' to not print a title pass '' to not print a title
pass None for a generic title pass None for a generic title
""" """
if ax is None: if ax is None:
fig = pb.figure(fignum) fig = pb.figure(fignum)
@ -168,8 +178,10 @@ class kern(Parameterized):
def add(self, other, tensor=False): def add(self, other, tensor=False):
""" """
Add another kernel to this one. Both kernels are defined on the same _space_ Add another kernel to this one. Both kernels are defined on the same _space_
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern
""" """
if tensor: if tensor:
D = self.input_dim + other.input_dim D = self.input_dim + other.input_dim
@ -211,11 +223,13 @@ class kern(Parameterized):
def prod(self, other, tensor=False): def prod(self, other, tensor=False):
""" """
multiply two kernels (either on the same space, or on the tensor product of the input space). Multiply two kernels (either on the same space, or on the tensor product of the input space).
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false). :param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool :type tensor: bool
""" """
K1 = self.copy() K1 = self.copy()
K2 = other.copy() K2 = other.copy()
@ -314,6 +328,7 @@ class kern(Parameterized):
:type X: np.ndarray (num_samples x input_dim) :type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X) :param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim) :type X2: np.ndarray (num_inducing x input_dim)
""" """
assert X.shape[1] == self.input_dim assert X.shape[1] == self.input_dim
target = np.zeros(self.num_params) target = np.zeros(self.num_params)
@ -333,8 +348,7 @@ class kern(Parameterized):
:type X: np.ndarray (num_samples x input_dim) :type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X) :param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)""" :type X2: np.ndarray (num_inducing x input_dim)"""
if X2 is None:
X2 = X
target = np.zeros_like(X) target = np.zeros_like(X)
if X2 is None: if X2 is None:
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] [p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
@ -408,6 +422,7 @@ class kern(Parameterized):
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim) :param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim) :param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing) :returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
""" """
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
@ -563,7 +578,7 @@ class Kern_check_model(Model):
def is_positive_definite(self): def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0] v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<0): if any(v<-10*sys.float_info.epsilon):
return False return False
else: else:
return True return True
@ -652,18 +667,102 @@ def kern_test(kern, X=None, X2=None, verbose=False):
:type X: ndarray :type X: ndarray
:param X2: X2 input values to test the covariance function. :param X2: X2 input values to test the covariance function.
:type X2: ndarray :type X2: ndarray
""" """
pass_checks = True
if X==None: if X==None:
X = np.random.randn(10, kern.input_dim) X = np.random.randn(10, kern.input_dim)
if X2==None: if X2==None:
X2 = np.random.randn(20, kern.input_dim) X2 = np.random.randn(20, kern.input_dim)
result = [Kern_check_model(kern, X=X).is_positive_definite(), if verbose:
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose), print("Checking covariance function is positive definite.")
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose), result = Kern_check_model(kern, X=X).is_positive_definite()
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose), if result and verbose:
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose), print("Check passed.")
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)] if not result:
# Need to check print("Positive definite check failed for " + kern.name + " covariance function.")
#Kern_check_dK_dX(kern, X, X2=None).checkgrad(verbose=verbose)] pass_checks = False
# but currently I think these aren't implemented. return False
return np.all(result)
if verbose:
print("Checking gradients of K(X, X) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt theta.")
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt theta.")
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("dK_dX not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("dK_dX not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of Kdiag(X) wrt X.")
try:
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("dK_dX not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
pass_checks = False
return False
return pass_checks

View file

@ -98,7 +98,11 @@ class Matern32(Kernpart):
def dK_dX(self, dL_dK, X, X2, target): def dK_dX(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = 2*(X[:, None, :] - X[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
else:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2)) dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))

View file

@ -98,7 +98,10 @@ class Matern52(Kernpart):
def dK_dX(self,dL_dK,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = 2*(X[:,None,:]-X[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
else:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))

161
GPy/kern/parts/ODE_1.py Normal file
View file

@ -0,0 +1,161 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
class ODE_1(Kernpart):
"""
kernel resultiong from a first order ODE with OU driving GP
:param input_dim: the number of input dimension, has to be equal to one
:type input_dim: int
:param varianceU: variance of the driving GP
:type varianceU: float
:param lengthscaleU: lengthscale of the driving GP (sqrt(3)/lengthscaleU)
:type lengthscaleU: float
:param varianceY: 'variance' of the transfer function
:type varianceY: float
:param lengthscaleY: 'lengthscale' of the transfer function (1/lengthscaleY)
:type lengthscaleY: float
:rtype: kernel object
"""
def __init__(self, input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
assert input_dim==1, "Only defined for input_dim = 1"
self.input_dim = input_dim
self.num_params = 4
self.name = 'ODE_1'
if lengthscaleU is not None:
lengthscaleU = np.asarray(lengthscaleU)
assert lengthscaleU.size == 1, "lengthscaleU should be one dimensional"
else:
lengthscaleU = np.ones(1)
if lengthscaleY is not None:
lengthscaleY = np.asarray(lengthscaleY)
assert lengthscaleY.size == 1, "lengthscaleY should be one dimensional"
else:
lengthscaleY = np.ones(1)
#lengthscaleY = 0.5
self._set_params(np.hstack((varianceU, varianceY, lengthscaleU,lengthscaleY)))
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.varianceU,self.varianceY, self.lengthscaleU,self.lengthscaleY))
def _set_params(self, x):
"""set the value of the parameters."""
assert x.size == self.num_params
self.varianceU = x[0]
self.varianceY = x[1]
self.lengthscaleU = x[2]
self.lengthscaleY = x[3]
def _get_param_names(self):
"""return parameter names."""
return ['varianceU','varianceY', 'lengthscaleU', 'lengthscaleY']
def K(self, X, X2, target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
# i1 = X[:,1]
# i2 = X2[:,1]
# X = X[:,0].reshape(-1,1)
# X2 = X2[:,0].reshape(-1,1)
dist = np.abs(X - X2.T)
ly=1/self.lengthscaleY
lu=np.sqrt(3)/self.lengthscaleU
#ly=self.lengthscaleY
#lu=self.lengthscaleU
k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix associated to X."""
ly=1/self.lengthscaleY
lu=np.sqrt(3)/self.lengthscaleU
#ly=self.lengthscaleY
#lu=self.lengthscaleU
k1 = (2*lu+ly)/(lu+ly)**2
k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2
k3 = 1/(lu+ly) + (lu)/(lu+ly)**2
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
def dK_dtheta(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.abs(X - X2.T)
ly=1/self.lengthscaleY
lu=np.sqrt(3)/self.lengthscaleU
#ly=self.lengthscaleY
#lu=self.lengthscaleU
dk1theta1 = np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
#c=np.sqrt(3)
#t1=c/lu
#t2=1/ly
#dk1theta1=np.exp(-dist*ly)*t2*( (2*c*t2+2*t1)/(c*t2+t1)**2 -2*(2*c*t2*t1+t1**2)/(c*t2+t1)**3 )
dk2theta1 = 1*(
np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2)
+np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3)
+np.exp(-dist*ly)*2*(ly-lu)**(-2)
+np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
)
dk3theta1 = np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
dktheta1 = self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1)
dk1theta2 = np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) )
dk2theta2 = 1*(
np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
)
dk3theta2 = np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
dktheta2 = self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2)
k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
dkdvar = k1+k2+k3
target[0] += np.sum(self.varianceY*dkdvar * dL_dK)
target[1] += np.sum(self.varianceU*dkdvar * dL_dK)
target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)
target[3] += np.sum(dktheta2*(-self.lengthscaleY**(-2)) * dL_dK)
# def dKdiag_dtheta(self, dL_dKdiag, X, target):
# """derivative of the diagonal of the covariance matrix with respect to the parameters."""
# # NB: derivative of diagonal elements wrt lengthscale is 0
# target[0] += np.sum(dL_dKdiag)
# def dK_dX(self, dL_dK, X, X2, target):
# """derivative of the covariance matrix with respect to X."""
# if X2 is None: X2 = X
# dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
# ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
# dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
# target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
# def dKdiag_dX(self, dL_dKdiag, X, target):
# pass

View file

@ -1,15 +1,19 @@
import bias import bias
import Brownian import Brownian
import coregionalise import coregionalize
import exponential import exponential
import eq_ode1
import finite_dimensional import finite_dimensional
import fixed import fixed
import gibbs import gibbs
import hetero
import hierarchical
import independent_outputs import independent_outputs
import linear import linear
import Matern32 import Matern32
import Matern52 import Matern52
import mlp import mlp
import ODE_1
import periodic_exponential import periodic_exponential
import periodic_Matern32 import periodic_Matern32
import periodic_Matern52 import periodic_Matern52
@ -19,8 +23,7 @@ import prod
import rational_quadratic import rational_quadratic
import rbfcos import rbfcos
import rbf import rbf
import rbf_inv
import spline import spline
import symmetric import symmetric
import white import white
import hierarchical
import rbf_inv

View file

@ -7,33 +7,40 @@ from GPy.util.linalg import mdot, pdinv
import pdb import pdb
from scipy import weave from scipy import weave
class Coregionalise(Kernpart): class Coregionalize(Kernpart):
""" """
Coregionalisation kernel. Covariance function for intrinsic/linear coregionalization models
Used for computing covariance functions of the form This covariance has the form:
.. math:: .. math::
k_2(x, y)=B k(x, y) \mathbf{B} = \mathbf{W}\mathbf{W}^\top + \text{diag}(kappa)
where
.. math::
B = WW^\top + diag(kappa)
:param output_dim: the number of output dimensions An intrinsic/linear coregionalization covariance function of the form:
.. math::
k_2(x, y)=\mathbf{B} k(x, y)
it is obtained as the tensor product between a covariance function
k(x,y) and B.
:param output_dim: number of outputs to coregionalize
:type output_dim: int :type output_dim: int
:param rank: the rank of the coregionalisation matrix. :param rank: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
:type rank: int :type rank: int
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B. :param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
:type W: ndarray :type W: numpy array of dimensionality (num_outpus, W_columns)
:param kappa: a diagonal term which allows the outputs to behave independently. :param kappa: a vector which allows the outputs to behave independently
:rtype: kernel object :type kappa: numpy array of dimensionality (output_dim,)
.. Note: see coregionalisation examples in GPy.examples.regression for some usage. .. note: see coregionalization examples in GPy.examples.regression for some usage.
""" """
def __init__(self,output_dim,rank=1, W=None, kappa=None): def __init__(self, output_dim, rank=1, W=None, kappa=None):
self.input_dim = 1 self.input_dim = 1
self.name = 'coregion' self.name = 'coregion'
self.output_dim = output_dim self.output_dim = output_dim
self.rank = rank self.rank = rank
if self.rank>output_dim-1:
print("Warning: Unusual choice of rank, it should normally be less than the output_dim.")
if W is None: if W is None:
self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank) self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
else: else:
@ -154,7 +161,5 @@ class Coregionalise(Kernpart):
target += np.hstack([dW.flatten(),dkappa]) target += np.hstack([dW.flatten(),dkappa])
def dK_dX(self,dL_dK,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
#NOTE In this case, pass is equivalent to returning zero.
pass pass

556
GPy/kern/parts/eq_ode1.py Normal file
View file

@ -0,0 +1,556 @@
# Copyright (c) 2013, GPy Authors, see AUTHORS.txt
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from GPy.util.linalg import mdot, pdinv
from GPy.util.ln_diff_erfs import ln_diff_erfs
import pdb
from scipy import weave
class Eq_ode1(Kernpart):
"""
Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
This outputs of this kernel have the form
.. math::
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
:param output_dim: number of outputs driven by latent function.
:type output_dim: int
:param W: sensitivities of each output to the latent driving function.
:type W: ndarray (output_dim x rank).
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
:type rank: int
:param decay: decay rates for the first order system.
:type decay: array of length output_dim.
:param delay: delay between latent force and output response.
:type delay: array of length output_dim.
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
:type kappa: array of length output_dim.
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
"""
def __init__(self,output_dim, W=None, rank=1, kappa=None, lengthscale=1.0, decay=None, delay=None):
self.rank = rank
self.input_dim = 1
self.name = 'eq_ode1'
self.output_dim = output_dim
self.lengthscale = lengthscale
self.num_params = self.output_dim*self.rank + 1 + (self.output_dim - 1)
if kappa is not None:
self.num_params+=self.output_dim
if delay is not None:
assert delay.shape==(self.output_dim-1,)
self.num_params+=self.output_dim-1
self.rank = rank
if W is None:
self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
else:
assert W.shape==(self.output_dim,self.rank)
self.W = W
if decay is None:
self.decay = np.ones(self.output_dim-1)
if kappa is not None:
assert kappa.shape==(self.output_dim,)
self.kappa = kappa
self.delay = delay
self.is_normalized = True
self.is_stationary = False
self.gaussian_initial = False
self._set_params(self._get_params())
def _get_params(self):
param_list = [self.W.flatten()]
if self.kappa is not None:
param_list.append(self.kappa)
param_list.append(self.decay)
if self.delay is not None:
param_list.append(self.delay)
param_list.append(self.lengthscale)
return np.hstack(param_list)
def _set_params(self,x):
assert x.size == self.num_params
end = self.output_dim*self.rank
self.W = x[:end].reshape(self.output_dim,self.rank)
start = end
self.B = np.dot(self.W,self.W.T)
if self.kappa is not None:
end+=self.output_dim
self.kappa = x[start:end]
self.B += np.diag(self.kappa)
start=end
end+=self.output_dim-1
self.decay = x[start:end]
start=end
if self.delay is not None:
end+=self.output_dim-1
self.delay = x[start:end]
start=end
end+=1
self.lengthscale = x[start]
self.sigma = np.sqrt(2)*self.lengthscale
def _get_param_names(self):
param_names = sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[])
if self.kappa is not None:
param_names += ['kappa_%i'%i for i in range(self.output_dim)]
param_names += ['decay_%i'%i for i in range(1,self.output_dim)]
if self.delay is not None:
param_names += ['delay_%i'%i for i in 1+range(1,self.output_dim)]
param_names+= ['lengthscale']
return param_names
def K(self,X,X2,target):
if X.shape[1] > 2:
raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
self._K_computations(X, X2)
target += self._scale*self._K_dvar
if self.gaussian_initial:
# Add covariance associated with initial condition.
t1_mat = self._t[self._rorder, None]
t2_mat = self._t2[None, self._rorder2]
target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat))
def Kdiag(self,index,target):
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
pass
def dK_dtheta(self,dL_dK,X,X2,target):
# First extract times and indices.
self._extract_t_indices(X, X2, dL_dK=dL_dK)
self._dK_ode_dtheta(target)
def _dK_ode_dtheta(self, target):
"""Do all the computations for the ode parts of the covariance function."""
t_ode = self._t[self._index>0]
dL_dK_ode = self._dL_dK[self._index>0, :]
index_ode = self._index[self._index>0]-1
if self._t2 is None:
if t_ode.size==0:
return
t2_ode = t_ode
dL_dK_ode = dL_dK_ode[:, self._index>0]
index2_ode = index_ode
else:
t2_ode = self._t2[self._index2>0]
dL_dK_ode = dL_dK_ode[:, self._index2>0]
if t_ode.size==0 or t2_ode.size==0:
return
index2_ode = self._index2[self._index2>0]-1
h1 = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary, update_derivatives=True)
#self._dK_ddelay = self._dh_ddelay
self._dK_dsigma = self._dh_dsigma
if self._t2 is None:
h2 = h1
else:
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary, update_derivatives=True)
#self._dK_ddelay += self._dh_ddelay.T
self._dK_dsigma += self._dh_dsigma.T
# C1 = self.sensitivity
# C2 = self.sensitivity
# K = 0.5 * (h1 + h2.T)
# var2 = C1*C2
# if self.is_normalized:
# dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + sum(sum(dL_dK.*dh2_dD1.T)))*0.5*var2
# dk_dD2 = (sum(sum(dL_dK.*dh1_dD2)) + sum(sum(dL_dK.*dh2_dD2.T)))*0.5*var2
# dk_dsigma = 0.5 * var2 * sum(sum(dL_dK.*dK_dsigma))
# dk_dC1 = C2 * sum(sum(dL_dK.*K))
# dk_dC2 = C1 * sum(sum(dL_dK.*K))
# else:
# K = np.sqrt(np.pi) * K
# dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + * sum(sum(dL_dK.*K))
# dk_dC2 = self.sigma * C1 * sum(sum(dL_dK.*K))
# dk_dSim1Variance = dk_dC1
# Last element is the length scale.
(dL_dK_ode[:, :, None]*self._dh_ddelay[:, None, :]).sum(2)
target[-1] += (dL_dK_ode*self._dK_dsigma/np.sqrt(2)).sum()
# # only pass the gradient with respect to the inverse width to one
# # of the gradient vectors ... otherwise it is counted twice.
# g1 = real([dk_dD1 dk_dinvWidth dk_dSim1Variance])
# g2 = real([dk_dD2 0 dk_dSim2Variance])
# return g1, g2"""
def dKdiag_dtheta(self,dL_dKdiag,index,target):
pass
def dK_dX(self,dL_dK,X,X2,target):
pass
def _extract_t_indices(self, X, X2=None, dL_dK=None):
"""Extract times and output indices from the input matrix X. Times are ordered according to their index for convenience of computation, this ordering is stored in self._order and self.order2. These orderings are then mapped back to the original ordering (in X) using self._rorder and self._rorder2. """
# TODO: some fast checking here to see if this needs recomputing?
self._t = X[:, 0]
if not X.shape[1] == 2:
raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
self._index = np.asarray(X[:, 1],dtype=np.int)
# Sort indices so that outputs are in blocks for computational
# convenience.
self._order = self._index.argsort()
self._index = self._index[self._order]
self._t = self._t[self._order]
self._rorder = self._order.argsort() # rorder is for reversing the order
if X2 is None:
self._t2 = None
self._index2 = None
self._order2 = self._order
self._rorder2 = self._rorder
else:
if not X2.shape[1] == 2:
raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
self._t2 = X2[:, 0]
self._index2 = np.asarray(X2[:, 1],dtype=np.int)
self._order2 = self._index2.argsort()
self._index2 = self._index2[self._order2]
self._t2 = self._t2[self._order2]
self._rorder2 = self._order2.argsort() # rorder2 is for reversing order
if dL_dK is not None:
self._dL_dK = dL_dK[self._order, :]
self._dL_dK = self._dL_dK[:, self._order2]
def _K_computations(self, X, X2):
"""Perform main body of computations for the ode1 covariance function."""
# First extract times and indices.
self._extract_t_indices(X, X2)
self._K_compute_eq()
self._K_compute_ode_eq()
if X2 is None:
self._K_eq_ode = self._K_ode_eq.T
else:
self._K_compute_ode_eq(transpose=True)
self._K_compute_ode()
if X2 is None:
self._K_dvar = np.zeros((self._t.shape[0], self._t.shape[0]))
else:
self._K_dvar = np.zeros((self._t.shape[0], self._t2.shape[0]))
# Reorder values of blocks for placing back into _K_dvar.
self._K_dvar = np.vstack((np.hstack((self._K_eq, self._K_eq_ode)),
np.hstack((self._K_ode_eq, self._K_ode))))
self._K_dvar = self._K_dvar[self._rorder, :]
self._K_dvar = self._K_dvar[:, self._rorder2]
if X2 is None:
# Matrix giving scales of each output
self._scale = np.zeros((self._t.size, self._t.size))
code="""
for(int i=0;i<N; i++){
scale_mat[i+i*N] = B[index[i]+output_dim*(index[i])];
for(int j=0; j<i; j++){
scale_mat[j+i*N] = B[index[i]+output_dim*index[j]];
scale_mat[i+j*N] = scale_mat[j+i*N];
}
}
"""
scale_mat, B, index = self._scale, self.B, self._index
N, output_dim = self._t.size, self.output_dim
weave.inline(code,['index',
'scale_mat', 'B',
'N', 'output_dim'])
else:
self._scale = np.zeros((self._t.size, self._t2.size))
code = """
for(int i=0; i<N; i++){
for(int j=0; j<N2; j++){
scale_mat[i+j*N] = B[index[i]+output_dim*index2[j]];
}
}
"""
scale_mat, B, index, index2 = self._scale, self.B, self._index, self._index2
N, N2, output_dim = self._t.size, self._t2.size, self.output_dim
weave.inline(code, ['index', 'index2',
'scale_mat', 'B',
'N', 'N2', 'output_dim'])
def _K_compute_eq(self):
"""Compute covariance for latent covariance."""
t_eq = self._t[self._index==0]
if self._t2 is None:
if t_eq.size==0:
self._K_eq = np.zeros((0, 0))
return
self._dist2 = np.square(t_eq[:, None] - t_eq[None, :])
else:
t2_eq = self._t2[self._index2==0]
if t_eq.size==0 or t2_eq.size==0:
self._K_eq = np.zeros((t_eq.size, t2_eq.size))
return
self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
self._K_eq = np.exp(-self._dist2/(2*self.lengthscale*self.lengthscale))
if self.is_normalized:
self._K_eq/=(np.sqrt(2*np.pi)*self.lengthscale)
def _K_compute_ode_eq(self, transpose=False):
"""Compute the cross covariances between latent exponentiated quadratic and observed ordinary differential equations.
:param transpose: if set to false the exponentiated quadratic is on the rows of the matrix and is computed according to self._t, if set to true it is on the columns and is computed according to self._t2 (default=False).
:type transpose: bool"""
if self._t2 is not None:
if transpose:
t_eq = self._t[self._index==0]
t_ode = self._t2[self._index2>0]
index_ode = self._index2[self._index2>0]-1
else:
t_eq = self._t2[self._index2==0]
t_ode = self._t[self._index>0]
index_ode = self._index[self._index>0]-1
else:
t_eq = self._t[self._index==0]
t_ode = self._t[self._index>0]
index_ode = self._index[self._index>0]-1
if t_ode.size==0 or t_eq.size==0:
if transpose:
self._K_eq_ode = np.zeros((t_eq.shape[0], t_ode.shape[0]))
else:
self._K_ode_eq = np.zeros((t_ode.shape[0], t_eq.shape[0]))
return
t_ode_mat = t_ode[:, None]
t_eq_mat = t_eq[None, :]
if self.delay is not None:
t_ode_mat -= self.delay[index_ode, None]
diff_t = (t_ode_mat - t_eq_mat)
inv_sigma_diff_t = 1./self.sigma*diff_t
decay_vals = self.decay[index_ode][:, None]
half_sigma_d_i = 0.5*self.sigma*decay_vals
if self.is_stationary:
ln_part, signs = ln_diff_erfs(inf, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
else:
ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/self.sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
sK = signs*np.exp(half_sigma_d_i*half_sigma_d_i - decay_vals*diff_t + ln_part)
sK *= 0.5
if not self.is_normalized:
sK *= np.sqrt(np.pi)*self.sigma
if transpose:
self._K_eq_ode = sK.T
else:
self._K_ode_eq = sK
def _K_compute_ode(self):
# Compute covariances between outputs of the ODE models.
t_ode = self._t[self._index>0]
index_ode = self._index[self._index>0]-1
if self._t2 is None:
if t_ode.size==0:
self._K_ode = np.zeros((0, 0))
return
t2_ode = t_ode
index2_ode = index_ode
else:
t2_ode = self._t2[self._index2>0]
if t_ode.size==0 or t2_ode.size==0:
self._K_ode = np.zeros((t_ode.size, t2_ode.size))
return
index2_ode = self._index2[self._index2>0]-1
# When index is identical
h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary)
if self._t2 is None:
self._K_ode = 0.5 * (h + h.T)
else:
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)
self._K_ode = 0.5 * (h + h2.T)
if not self.is_normalized:
self._K_ode *= np.sqrt(np.pi)*self.sigma
def _compute_diag_H(self, t, index, update_derivatives=False, stationary=False):
"""Helper function for computing H for the diagonal only.
:param t: time input.
:type t: array
:param index: first output indices
:type index: array of int.
:param index: second output indices
:type index: array of int.
:param update_derivatives: whether or not to update the derivative portions (default False).
:type update_derivatives: bool
:param stationary: whether to compute the stationary version of the covariance (default False).
:type stationary: bool"""
"""if delta_i~=delta_j:
[h, dh_dD_i, dh_dD_j, dh_dsigma] = np.diag(simComputeH(t, index, t, index, update_derivatives=True, stationary=self.is_stationary))
else:
Decay = self.decay[index]
if self.delay is not None:
t = t - self.delay[index]
t_squared = t*t
half_sigma_decay = 0.5*self.sigma*Decay
[ln_part_1, sign1] = ln_diff_erfs(half_sigma_decay + t/self.sigma,
half_sigma_decay)
[ln_part_2, sign2] = ln_diff_erfs(half_sigma_decay,
half_sigma_decay - t/self.sigma)
h = (sign1*np.exp(half_sigma_decay*half_sigma_decay
+ ln_part_1
- log(Decay + D_j))
- sign2*np.exp(half_sigma_decay*half_sigma_decay
- (Decay + D_j)*t
+ ln_part_2
- log(Decay + D_j)))
sigma2 = self.sigma*self.sigma
if update_derivatives:
dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h
+ t*sign2*np.exp(
half_sigma_decay*half_sigma_decay-(Decay+D_j)*t + ln_part_2
)
+ self.sigma/np.sqrt(np.pi)*
(-1 + np.exp(-t_squared/sigma2-Decay*t)
+ np.exp(-t_squared/sigma2-D_j*t)
- np.exp(-(Decay + D_j)*t)))
dh_dD_i = (dh_dD_i/(Decay+D_j)).real
dh_dD_j = (t*sign2*np.exp(
half_sigma_decay*half_sigma_decay-(Decay + D_j)*t+ln_part_2
)
-h)
dh_dD_j = (dh_dD_j/(Decay + D_j)).real
dh_dsigma = 0.5*Decay*Decay*self.sigma*h \
+ 2/(np.sqrt(np.pi)*(Decay+D_j))\
*((-Decay/2) \
+ (-t/sigma2+Decay/2)*np.exp(-t_squared/sigma2 - Decay*t) \
- (-t/sigma2-Decay/2)*np.exp(-t_squared/sigma2 - D_j*t) \
- Decay/2*np.exp(-(Decay+D_j)*t))"""
pass
def _compute_H(self, t, index, t2, index2, update_derivatives=False, stationary=False):
"""Helper function for computing part of the ode1 covariance function.
:param t: first time input.
:type t: array
:param index: Indices of first output.
:type index: array of int
:param t2: second time input.
:type t2: array
:param index2: Indices of second output.
:type index2: array of int
:param update_derivatives: whether to update derivatives (default is False)
:return h : result of this subcomponent of the kernel for the given values.
:rtype: ndarray
"""
if stationary:
raise NotImplementedError, "Error, stationary version of this covariance not yet implemented."
# Vector of decays and delays associated with each output.
Decay = self.decay[index]
Decay2 = self.decay[index2]
t_mat = t[:, None]
t2_mat = t2[None, :]
if self.delay is not None:
Delay = self.delay[index]
Delay2 = self.delay[index2]
t_mat-=Delay[:, None]
t2_mat-=Delay2[None, :]
diff_t = (t_mat - t2_mat)
inv_sigma_diff_t = 1./self.sigma*diff_t
half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]
ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma,
half_sigma_decay_i - inv_sigma_diff_t,
return_sign=True)
ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
half_sigma_decay_i - t_mat/self.sigma,
return_sign=True)
h = sign1*np.exp(half_sigma_decay_i
*half_sigma_decay_i
-Decay[:, None]*diff_t+ln_part_1
-np.log(Decay[:, None] + Decay2[None, :]))
h -= sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i
-Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
-np.log(Decay[:, None] + Decay2[None, :]))
if update_derivatives:
sigma2 = self.sigma*self.sigma
# Update ith decay gradient
dh_ddecay = ((0.5*Decay[:, None]*sigma2*(Decay[:, None] + Decay2[None, :])-1)*h
+ (-diff_t*sign1*np.exp(
half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*diff_t+ln_part_1
)
+t_mat*sign2*np.exp(
half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*t_mat
- Decay2*t2_mat+ln_part_2))
+self.sigma/np.sqrt(np.pi)*(
-np.exp(
-diff_t*diff_t/sigma2
)+np.exp(
-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat
)+np.exp(
-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat
)-np.exp(
-(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
)
))
self._dh_ddecay = (dh_ddecay/(Decay[:, None]+Decay2[None, :])).real
# Update jth decay gradient
dh_ddecay2 = (t2_mat*sign2
*np.exp(
half_sigma_decay_i*half_sigma_decay_i
-(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
+ln_part_2
)
-h)
self._dh_ddecay2 = (dh_ddecay/(Decay[:, None] + Decay2[None, :])).real
# Update sigma gradient
self._dh_dsigma = (half_sigma_decay_i*Decay[:, None]*h
+ 2/(np.sqrt(np.pi)
*(Decay[:, None]+Decay2[None, :]))
*((-diff_t/sigma2-Decay[:, None]/2)
*np.exp(-diff_t*diff_t/sigma2)
+ (-t2_mat/sigma2+Decay[:, None]/2)
*np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat)
- (-t_mat/sigma2-Decay[:, None]/2)
*np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat)
- Decay[:, None]/2
*np.exp(-(Decay[:, None]*t_mat+Decay2[None, :]*t2_mat))))
return h

View file

@ -9,7 +9,7 @@ import GPy
class Gibbs(Kernpart): class Gibbs(Kernpart):
""" """
Gibbs and MacKay non-stationary covariance function. Gibbs non-stationary covariance function.
.. math:: .. math::
@ -25,7 +25,10 @@ class Gibbs(Kernpart):
with input location. This leads to an additional term in front of with input location. This leads to an additional term in front of
the kernel. the kernel.
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used. The parameters are :math:`\sigma^2`, the process variance, and
the parameters of l(x) which is a function that can be
specified by the user, by default an multi-layer peceptron is
used.
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
@ -37,6 +40,15 @@ class Gibbs(Kernpart):
:type ARD: Boolean :type ARD: Boolean
:rtype: Kernpart object :rtype: Kernpart object
See Mark Gibbs's thesis for more details: Gibbs,
M. N. (1997). Bayesian Gaussian Processes for Regression and
Classification. PhD thesis, Department of Physics, University of
Cambridge. Or also see Page 93 of Gaussian Processes for Machine
Learning by Rasmussen and Williams. Although note that we do not
constrain the lengthscale to be positive by default. This allows
anticorrelation to occur. The positive constraint can be included
by the user manually.
""" """
def __init__(self, input_dim, variance=1., mapping=None, ARD=False): def __init__(self, input_dim, variance=1., mapping=None, ARD=False):
@ -89,11 +101,17 @@ class Gibbs(Kernpart):
"""Derivative of the covariance matrix with respect to X.""" """Derivative of the covariance matrix with respect to X."""
# First account for gradients arising from presence of X in exponent. # First account for gradients arising from presence of X in exponent.
self._K_computations(X, X2) self._K_computations(X, X2)
_K_dist = X[:, None, :] - X2[None, :, :] if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2)) dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
target += np.sum(dK_dX*dL_dK.T[:, :, None], 0) target += np.sum(dK_dX*dL_dK.T[:, :, None], 0)
# Now account for gradients arising from presence of X in lengthscale. # Now account for gradients arising from presence of X in lengthscale.
self._dK_computations(dL_dK) self._dK_computations(dL_dK)
if X2 is None:
target += 2.*self.mapping.df_dX(self._dL_dl[:, None], X)
else:
target += self.mapping.df_dX(self._dL_dl[:, None], X) target += self.mapping.df_dX(self._dL_dl[:, None], X)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
@ -102,7 +120,8 @@ class Gibbs(Kernpart):
def dKdiag_dtheta(self, dL_dKdiag, X, target): def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to parameters.""" """Gradient of diagonal of covariance with respect to parameters."""
pass target[0] += np.sum(dL_dKdiag)
def _K_computations(self, X, X2=None): def _K_computations(self, X, X2=None):

104
GPy/kern/parts/hetero.py Normal file
View file

@ -0,0 +1,104 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from IPython.core.debugger import Tracer; debug_here=Tracer()
from kernpart import Kernpart
import numpy as np
from ...util.linalg import tdot
from ...core.mapping import Mapping
import GPy
class Hetero(Kernpart):
"""
TODO: Need to constrain the function outputs
positive (still thinking of best way of doing this!!! Yes, intend to use
transformations, but what's the *best* way). Currently just squaring output.
Heteroschedastic noise which depends on input location. See, for example,
this paper by Goldberg et al.
.. math::
k(x_i, x_j) = \delta_{i,j} \sigma^2(x_i)
where :math:`\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
The parameters are the parameters of \sigma^2(x) which is a
function that can be specified by the user, by default an
multi-layer peceptron is used.
:param input_dim: the number of input dimensions
:type input_dim: int
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
:type mapping: GPy.core.Mapping
:rtype: Kernpart object
See this paper:
Goldberg, P. W. Williams, C. K. I. and Bishop,
C. M. (1998) Regression with Input-dependent Noise: a Gaussian
Process Treatment In Advances in Neural Information Processing
Systems, Volume 10, pp. 493-499. MIT Press
for a Gaussian process treatment of this problem.
"""
def __init__(self, input_dim, mapping=None, transform=None):
self.input_dim = input_dim
if not mapping:
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
if not transform:
transform = GPy.core.transformations.logexp()
self.transform = transform
self.mapping = mapping
self.name='hetero'
self.num_params=self.mapping.num_params
self._set_params(self.mapping._get_params())
def _get_params(self):
return self.mapping._get_params()
def _set_params(self, x):
assert x.size == (self.num_params)
self.mapping._set_params(x)
def _get_param_names(self):
return self.mapping._get_param_names()
def K(self, X, X2, target):
"""Return covariance between X and X2."""
if (X2 is None) or (X2 is X):
target[np.diag_indices_from(target)] += self._Kdiag(X)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
target+=self._Kdiag(X)
def _Kdiag(self, X):
"""Helper function for computing the diagonal elements of the covariance."""
return self.mapping.f(X).flatten()**2
def dK_dtheta(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
if (X2 is None) or (X2 is X):
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
self.dKdiag_dtheta(dL_dKdiag, X, target)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to parameters."""
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
def dK_dX(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X."""
if X2==None or X2 is X:
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
self.dKdiag_dX(dL_dKdiag, X, target)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X."""
target += 2.*self.mapping.df_dX(dL_dKdiag[:, None], X)*self.mapping.f(X)

View file

@ -58,6 +58,50 @@ class Kernpart(object):
raise NotImplementedError raise NotImplementedError
def dK_dX(self, dL_dK, X, X2, target): def dK_dX(self, dL_dK, X, X2, target):
raise NotImplementedError raise NotImplementedError
def dKdiag_dX(self, dL_dK, X, target):
raise NotImplementedError
class Kernpart_stationary(Kernpart):
def __init__(self, input_dim, lengthscale=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not ARD:
self.num_params = 2
if lengthscale is not None:
self.lengthscale = np.asarray(lengthscale)
assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
self.lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
if lengthscale is not None:
self.lengthscale = np.asarray(lengthscale)
assert self.lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
self.lengthscale = np.ones(self.input_dim)
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))
def _set_params(self, x):
self.lengthscale = x
self.lengthscale2 = np.square(self.lengthscale)
# reset cached results
self._X, self._X2, self._params = np.empty(shape=(3, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def dKdiag_dtheta(self, dL_dKdiag, X, target):
# For stationary covariances, derivative of diagonal elements
# wrt lengthscale is 0.
target[0] += np.sum(dL_dKdiag)
def dKdiag_dX(self, dL_dK, X, target):
pass # true for all stationary kernels
class Kernpart_inner(Kernpart): class Kernpart_inner(Kernpart):
def __init__(self,input_dim): def __init__(self,input_dim):
@ -74,3 +118,5 @@ class Kernpart_inner(Kernpart):
# initialize cache # initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1)) self._X, self._X2, self._params = np.empty(shape=(3, 1))

View file

@ -99,6 +99,9 @@ class Linear(Kernpart):
target += tmp.sum() target += tmp.sum()
def dK_dX(self, dL_dK, X, X2, target): def dK_dX(self, dL_dK, X, X2, target):
if X2 is None:
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
else:
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self,dL_dKdiag,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):

View file

@ -7,11 +7,13 @@ four_over_tau = 2./np.pi
class MLP(Kernpart): class MLP(Kernpart):
""" """
multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
Multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
.. math:: .. math::
k(x,y) = \sigma^2 \frac{2}{\pi} \text{asin} \left(\frac{\sigma_w^2 x^\top y+\sigma_b^2}{\sqrt{\sigma_w^2x^\top x + \sigma_b^2 + 1}\sqrt{\sigma_w^2 y^\top y \sigma_b^2 +1}} \right) k(x,y) = \\sigma^{2}\\frac{2}{\\pi } \\text{asin} \\left ( \\frac{ \\sigma_w^2 x^\\top y+\\sigma_b^2}{\\sqrt{\\sigma_w^2x^\\top x + \\sigma_b^2 + 1}\\sqrt{\\sigma_w^2 y^\\top y \\sigma_b^2 +1}} \\right )
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
@ -24,6 +26,7 @@ class MLP(Kernpart):
:type ARD: Boolean :type ARD: Boolean
:rtype: Kernpart object :rtype: Kernpart object
""" """
def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=100., ARD=False): def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=100., ARD=False):
@ -110,9 +113,13 @@ class MLP(Kernpart):
arg = self._K_asin_arg arg = self._K_asin_arg
numer = self._K_numer numer = self._K_numer
denom = self._K_denom denom = self._K_denom
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
denom3 = denom*denom*denom denom3 = denom*denom*denom
if X2 is not None:
vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1.
target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1) target += four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
else:
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
target += 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X""" """Gradient of diagonal of covariance with respect to X"""

38
GPy/kern/parts/odekern1.c Normal file
View file

@ -0,0 +1,38 @@
#include <math.h>
double k_uu(t1,t2,theta1,theta2,sig1,sig2)
{
double kern=0;
double dist=0;
dist = sqrt(t2*t2-t1*t1)
kern = sig1*(1+theta1*dist)*exp(-theta1*dist)
return kern;
}
double k_yy(t1, t2, theta1,theta2,sig1,sig2)
{
double kern=0;
double dist=0;
dist = sqrt(t2*t2-t1*t1)
kern = sig1*sig2 * ( exp(-theta1*dist)*(theta2-2*theta1+theta1*theta2*dist-theta1*theta1*dist) +
exp(-dist) ) / ((theta2-theta1)*(theta2-theta1))
return kern;
}

View file

@ -7,22 +7,22 @@ four_over_tau = 2./np.pi
class POLY(Kernpart): class POLY(Kernpart):
""" """
polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel,
Polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel:
.. math:: .. math::
k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
k(x, y) = \sigma^2*(\sigma_w^2 x'y+\sigma_b^b)^d The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
(weight_variance), :math:`\sigma^2_b` (bias_variance) and d
The kernel parameters are \sigma^2 (variance), \sigma^2_w
(weight_variance), \sigma^2_b (bias_variance) and d
(degree). Only gradients of the first three are provided for (degree). Only gradients of the first three are provided for
kernel optimisation, it is assumed that polynomial degree would kernel optimisation, it is assumed that polynomial degree would
be set by hand. be set by hand.
The kernel is not recommended as it is badly behaved when the The kernel is not recommended as it is badly behaved when the
\sigma^2_w*x'*y + \sigma^2_b has a magnitude greater than one. For completeness :math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
there is an automatic relevance determination version of this there is an automatic relevance determination version of this
kernel provided. kernel provided (NOTE YET IMPLEMENTED!).
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
:param variance: the variance :math:`\sigma^2` :param variance: the variance :math:`\sigma^2`
@ -32,7 +32,7 @@ class POLY(Kernpart):
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b` :param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
:param degree: the degree of the polynomial. :param degree: the degree of the polynomial.
:type degree: int :type degree: int
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension. :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean :type ARD: Boolean
:rtype: Kernpart object :rtype: Kernpart object
@ -103,6 +103,9 @@ class POLY(Kernpart):
"""Derivative of the covariance matrix with respect to X""" """Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2) self._K_computations(X, X2)
arg = self._K_poly_arg arg = self._K_poly_arg
if X2 is None:
target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
else:
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1) target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):

View file

@ -2,6 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart from kernpart import Kernpart
from coregionalize import Coregionalize
import numpy as np import numpy as np
import hashlib import hashlib
@ -18,7 +19,7 @@ class Prod(Kernpart):
""" """
def __init__(self,k1,k2,tensor=False): def __init__(self,k1,k2,tensor=False):
self.num_params = k1.num_params + k2.num_params self.num_params = k1.num_params + k2.num_params
self.name = k1.name + '<times>' + k2.name self.name = '['+k1.name + '**' + k2.name +']'
self.k1 = k1 self.k1 = k1
self.k2 = k2 self.k2 = k2
if tensor: if tensor:
@ -90,6 +91,16 @@ class Prod(Kernpart):
def dK_dX(self,dL_dK,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
self._K_computations(X,X2) self._K_computations(X,X2)
if X2 is None:
if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize):
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize):
#NOTE The indices column in the inputs makes the ki.dK_dX fail when passing None instead of X[:,self.slicei]
X2 = X
self.k1.dK_dX(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.dK_dX(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
else:
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])

View file

@ -57,7 +57,7 @@ class RationalQuadratic(Kernpart):
dist2 = np.square((X-X2.T)/self.lengthscale) dist2 = np.square((X-X2.T)/self.lengthscale)
dvar = (1 + dist2/2.)**(-self.power) dvar = (1 + dist2/2.)**(-self.power)
dl = self.power * self.variance * dist2 * self.lengthscale**(-3) * (1 + dist2/2./self.power)**(-self.power-1) dl = self.power * self.variance * dist2 / self.lengthscale * (1 + dist2/2.)**(-self.power-1)
dp = - self.variance * np.log(1 + dist2/2.) * (1 + dist2/2.)**(-self.power) dp = - self.variance * np.log(1 + dist2/2.) * (1 + dist2/2.)**(-self.power)
target[0] += np.sum(dvar*dL_dK) target[0] += np.sum(dvar*dL_dK)
@ -70,9 +70,11 @@ class RationalQuadratic(Kernpart):
def dK_dX(self,dL_dK,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None:
dist2 = np.square((X-X.T)/self.lengthscale)
dX = -2.*self.variance*self.power * (X-X.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
else:
dist2 = np.square((X-X2.T)/self.lengthscale) dist2 = np.square((X-X2.T)/self.lengthscale)
dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1) dX = -self.variance*self.power * (X-X2.T)/self.lengthscale**2 * (1 + dist2/2./self.lengthscale)**(-self.power-1)
target += np.sum(dL_dK*dX,1)[:,np.newaxis] target += np.sum(dL_dK*dX,1)[:,np.newaxis]

View file

@ -138,6 +138,9 @@ class RBF(Kernpart):
def dK_dX(self, dL_dK, X, X2, target): def dK_dX(self, dL_dK, X, X2, target):
self._K_computations(X, X2) self._K_computations(X, X2)
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)

View file

@ -133,6 +133,9 @@ class RBFInv(RBF):
def dK_dX(self, dL_dK, X, X2, target): def dK_dX(self, dL_dK, X, X2, target):
self._K_computations(X, X2) self._K_computations(X, X2)
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena. _K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2)) dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)

View file

@ -1,6 +1,7 @@
#include <math.h> #include <math.h>
double DiracDelta(double x){ double DiracDelta(double x){
if((x<0.000001) & (x>-0.000001))//go on, laught at my c++ skills // TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil
if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills
return 1.0; return 1.0;
else else
return 0.0; return 0.0;
@ -8,3 +9,17 @@ double DiracDelta(double x){
double DiracDelta(double x,int foo){ double DiracDelta(double x,int foo){
return 0.0; return 0.0;
}; };
double sinc(double x){
if (x==0)
return 1.0;
else
return sin(x)/x;
}
double sinc_grad(double x){
if (x==0)
return 0.0;
else
return (x*cos(x) - sin(x))/(x*x);
}

View file

@ -1,3 +1,6 @@
#include <math.h> #include <math.h>
double DiracDelta(double x); double DiracDelta(double x);
double DiracDelta(double x, int foo); double DiracDelta(double x, int foo);
double sinc(double x);
double sinc_grad(double x);

View file

@ -26,8 +26,11 @@ class spkern(Kernpart):
- to handle multiple inputs, call them x1, z1, etc - to handle multiple inputs, call them x1, z1, etc
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
""" """
def __init__(self,input_dim,k,param=None): def __init__(self,input_dim,k,name=None,param=None):
if name is None:
self.name='sympykern' self.name='sympykern'
else:
self.name = name
self._sp_k = k self._sp_k = k
sp_vars = [e for e in k.atoms() if e.is_Symbol] sp_vars = [e for e in k.atoms() if e.is_Symbol]
self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:])) self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:]))
@ -56,9 +59,9 @@ class spkern(Kernpart):
self.weave_kwargs = {\ self.weave_kwargs = {\
'support_code':self._function_code,\ 'support_code':self._function_code,\
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],\ 'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],\
'headers':['"sympy_helpers.h"'],\ 'headers':['"sympy_helpers.h"'],\
'sources':[os.path.join(current_dir,"kern/sympy_helpers.cpp")],\ 'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],\
#'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\ #'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\
'extra_compile_args':[],\ 'extra_compile_args':[],\
'extra_link_args':['-lgomp'],\ 'extra_link_args':['-lgomp'],\
@ -109,14 +112,15 @@ class spkern(Kernpart):
f.write(self._function_header) f.write(self._function_header)
f.close() f.close()
#get rid of derivatives of DiracDelta # Substitute any known derivatives which sympy doesn't compute
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
#Here's some code to do the looping for K # Here's the code to do the looping for K
arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]\ arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]
+ ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]\ + ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]
+ ["param[%i]"%i for i in range(self.num_params)]) + ["param[%i]"%i for i in range(self.num_params)])
self._K_code =\ self._K_code =\
""" """
int i; int i;
@ -133,9 +137,14 @@ class spkern(Kernpart):
%s %s
"""%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed """%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Similar code when only X is provided.
self._K_code_X = self._K_code.replace('Z[', 'X[')
# Code to compute diagonal of covariance.
diag_arglist = re.sub('Z','X',arglist) diag_arglist = re.sub('Z','X',arglist)
diag_arglist = re.sub('j','i',diag_arglist) diag_arglist = re.sub('j','i',diag_arglist)
#Here's some code to do the looping for Kdiag # Code to do the looping for Kdiag
self._Kdiag_code =\ self._Kdiag_code =\
""" """
int i; int i;
@ -148,8 +157,9 @@ class spkern(Kernpart):
%s %s
"""%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed """%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
#here's some code to compute gradients # Code to compute gradients
funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in enumerate(self._sp_theta)]) funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in enumerate(self._sp_theta)])
self._dK_dtheta_code =\ self._dK_dtheta_code =\
""" """
int i; int i;
@ -164,9 +174,12 @@ class spkern(Kernpart):
} }
} }
%s %s
"""%(funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed """%(funclist,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
#here's some code to compute gradients for Kdiag TODO: thius is yucky. # Similar code when only X is provided, change argument lists.
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
# Code to compute gradients for Kdiag TODO: needs clean up
diag_funclist = re.sub('Z','X',funclist,count=0) diag_funclist = re.sub('Z','X',funclist,count=0)
diag_funclist = re.sub('j','i',diag_funclist) diag_funclist = re.sub('j','i',diag_funclist)
diag_funclist = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_funclist) diag_funclist = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_funclist)
@ -181,8 +194,12 @@ class spkern(Kernpart):
%s %s
"""%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed """%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
#Here's some code to do gradients wrt x # Code for gradients wrt X
gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.input_dim)]) gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.input_dim)])
if False:
gradient_funcs += """if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}"""
self._dK_dX_code = \ self._dK_dX_code = \
""" """
int i; int i;
@ -194,28 +211,32 @@ class spkern(Kernpart):
for (i=0;i<N; i++){ for (i=0;i<N; i++){
for (j=0; j<num_inducing; j++){ for (j=0; j<num_inducing; j++){
%s %s
//if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
//if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}
} }
} }
%s %s
"""%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed """%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
#now for gradients of Kdiag wrt X # Create code for call when just X is passed as argument.
self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= partial[', '+= 2*partial[')
diag_gradient_funcs = re.sub('Z','X',gradient_funcs,count=0)
diag_gradient_funcs = re.sub('j','i',diag_gradient_funcs)
diag_gradient_funcs = re.sub('partial\[i\*num_inducing\+i\]','2*partial[i]',diag_gradient_funcs)
# Code for gradients of Kdiag wrt X
self._dKdiag_dX_code= \ self._dKdiag_dX_code= \
""" """
int i;
int j;
int N = partial_array->dimensions[0]; int N = partial_array->dimensions[0];
int num_inducing = 0;
int input_dim = X_array->dimensions[1]; int input_dim = X_array->dimensions[1];
for (i=0;i<N; i++){ for (int i=0;i<N; i++){
j = i;
%s %s
} }
%s %s
"""%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed """%(diag_gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a
# string representation forces recompile when needed Get rid
# of Zs in argument for diagonal. TODO: Why wasn't
# diag_funclist called here? Need to check that.
#self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
#TODO: insert multiple functions here via string manipulation #TODO: insert multiple functions here via string manipulation
@ -223,6 +244,9 @@ class spkern(Kernpart):
def K(self,X,Z,target): def K(self,X,Z,target):
param = self._param param = self._param
if Z is None:
weave.inline(self._K_code_X,arg_names=['target','X','param'],**self.weave_kwargs)
else:
weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs) weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
def Kdiag(self,X,target): def Kdiag(self,X,target):
@ -231,21 +255,25 @@ class spkern(Kernpart):
def dK_dtheta(self,partial,X,Z,target): def dK_dtheta(self,partial,X,Z,target):
param = self._param param = self._param
weave.inline(self._dK_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs) if Z is None:
weave.inline(self._dK_dtheta_code_X, arg_names=['target','X','param','partial'],**self.weave_kwargs)
else:
weave.inline(self._dK_dtheta_code, arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,partial,X,target):
param = self._param param = self._param
Z = X weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)
weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
def dK_dX(self,partial,X,Z,target): def dK_dX(self,partial,X,Z,target):
param = self._param param = self._param
if Z is None:
weave.inline(self._dK_dX_code_X,arg_names=['target','X','param','partial'],**self.weave_kwargs)
else:
weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs) weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
def dKdiag_dX(self,partial,X,target): def dKdiag_dX(self,partial,X,target):
param = self._param param = self._param
Z = X weave.inline(self._dKdiag_dX_code,arg_names=['target','X','param','partial'],**self.weave_kwargs)
weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
def _set_params(self,param): def _set_params(self,param):
#print param.flags['C_CONTIGUOUS'] #print param.flags['C_CONTIGUOUS']

View file

@ -1,5 +1,6 @@
from ep import EP from ep import EP
from laplace import Laplace from laplace import Laplace
from ep_mixed_noise import EP_Mixed_Noise
from gaussian import Gaussian from gaussian import Gaussian
# TODO: from Laplace import Laplace from gaussian_mixed_noise import Gaussian_Mixed_Noise
import likelihood_functions as functions from noise_model_constructors import *

View file

@ -4,45 +4,46 @@ from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
from likelihood import likelihood from likelihood import likelihood
class EP(likelihood): class EP(likelihood):
def __init__(self,data,LikelihoodFunction,epsilon=1e-3,power_ep=[1.,1.]): def __init__(self,data,noise_model):
""" """
Expectation Propagation Expectation Propagation
Arguments :param data: data to model
--------- :type data: numpy array
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float) :param noise_model: noise distribution
LikelihoodFunction : a likelihood function (see likelihood_functions.py) :type noise_model: A GPy noise model
""" """
self.LikelihoodFunction = LikelihoodFunction self.noise_model = noise_model
self.epsilon = epsilon
self.eta, self.delta = power_ep
self.data = data self.data = data
self.N, self.output_dim = self.data.shape self.num_data, self.output_dim = self.data.shape
self.is_heteroscedastic = True self.is_heteroscedastic = True
self.Nparams = 0 self.Nparams = 0
self._transf_data = self.LikelihoodFunction._preprocess_values(data) self._transf_data = self.noise_model._preprocess_values(data)
#Initial values - Likelihood approximation parameters: #Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde) #p(y|f) = t(f|tau_tilde,v_tilde)
self.tau_tilde = np.zeros(self.N) self.tau_tilde = np.zeros(self.num_data)
self.v_tilde = np.zeros(self.N) self.v_tilde = np.zeros(self.num_data)
#initial values for the GP variables #initial values for the GP variables
self.Y = np.zeros((self.N,1)) self.Y = np.zeros((self.num_data,1))
self.covariance_matrix = np.eye(self.N) self.covariance_matrix = np.eye(self.num_data)
self.precision = np.ones(self.N)[:,None] self.precision = np.ones(self.num_data)[:,None]
self.Z = 0 self.Z = 0
self.YYT = None self.YYT = None
self.V = self.precision * self.Y self.V = self.precision * self.Y
self.VVT_factor = self.V self.VVT_factor = self.V
self.trYYT = 0. self.trYYT = 0.
super(EP, self).__init__()
def restart(self): def restart(self):
self.tau_tilde = np.zeros(self.N) self.tau_tilde = np.zeros(self.num_data)
self.v_tilde = np.zeros(self.N) self.v_tilde = np.zeros(self.num_data)
self.Y = np.zeros((self.N,1)) self.Y = np.zeros((self.num_data,1))
self.covariance_matrix = np.eye(self.N) self.covariance_matrix = np.eye(self.num_data)
self.precision = np.ones(self.N)[:,None] self.precision = np.ones(self.num_data)[:,None]
self.Z = 0 self.Z = 0
self.YYT = None self.YYT = None
self.V = self.precision * self.Y self.V = self.precision * self.Y
@ -52,16 +53,23 @@ class EP(likelihood):
def predictive_values(self,mu,var,full_cov): def predictive_values(self,mu,var,full_cov):
if full_cov: if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood" raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
return self.LikelihoodFunction.predictive_values(mu,var) return self.noise_model.predictive_values(mu,var)
def _get_params(self): def _get_params(self):
return np.zeros(0) #return np.zeros(0)
return self.noise_model._get_params()
def _get_param_names(self): def _get_param_names(self):
return [] #return []
return self.noise_model._get_param_names()
def _set_params(self,p): def _set_params(self,p):
pass # TODO: the EP likelihood might want to take some parameters... #pass # TODO: the EP likelihood might want to take some parameters...
self.noise_model._set_params(p)
def _gradients(self,partial): def _gradients(self,partial):
return np.zeros(0) # TODO: the EP likelihood might want to take some parameters... #return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
return self.noise_model._gradients(partial)
def _compute_GP_variables(self): def _compute_GP_variables(self):
#Variables to be called from GP #Variables to be called from GP
@ -69,6 +77,7 @@ class EP(likelihood):
sigma_sum = 1./self.tau_ + 1./self.tau_tilde sigma_sum = 1./self.tau_ + 1./self.tau_tilde
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2 mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
self.Z += 0.5*self.num_data*np.log(2*np.pi)
self.Y = mu_tilde[:,None] self.Y = mu_tilde[:,None]
self.YYT = np.dot(self.Y,self.Y.T) self.YYT = np.dot(self.Y,self.Y.T)
@ -78,13 +87,22 @@ class EP(likelihood):
self.VVT_factor = self.V self.VVT_factor = self.V
self.trYYT = np.trace(self.YYT) self.trYYT = np.trace(self.YYT)
def fit_full(self,K): def fit_full(self, K, epsilon=1e-3,power_ep=[1.,1.]):
""" """
The expectation-propagation algorithm. The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006. For nomenclature see Rasmussen & Williams 2006.
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float
:param power_ep: Power EP parameters
:type power_ep: list of floats
""" """
self.epsilon = epsilon
self.eta, self.delta = power_ep
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.N) mu = np.zeros(self.num_data)
Sigma = K.copy() Sigma = K.copy()
""" """
@ -93,15 +111,15 @@ class EP(likelihood):
sigma_ = 1./tau_ sigma_ = 1./tau_
mu_ = v_/tau_ mu_ = v_/tau_
""" """
self.tau_ = np.empty(self.N,dtype=float) self.tau_ = np.empty(self.num_data,dtype=float)
self.v_ = np.empty(self.N,dtype=float) self.v_ = np.empty(self.num_data,dtype=float)
#Initial values - Marginal moments #Initial values - Marginal moments
z = np.empty(self.N,dtype=float) z = np.empty(self.num_data,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float) self.Z_hat = np.empty(self.num_data,dtype=float)
phi = np.empty(self.N,dtype=float) phi = np.empty(self.num_data,dtype=float)
mu_hat = np.empty(self.N,dtype=float) mu_hat = np.empty(self.num_data,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float) sigma2_hat = np.empty(self.num_data,dtype=float)
#Approximation #Approximation
epsilon_np1 = self.epsilon + 1. epsilon_np1 = self.epsilon + 1.
@ -110,13 +128,13 @@ class EP(likelihood):
self.np1 = [self.tau_tilde.copy()] self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()] self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N) update_order = np.random.permutation(self.num_data)
for i in update_order: for i in update_order:
#Cavity distribution parameters #Cavity distribution parameters
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i] self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i] self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments #Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
@ -128,23 +146,32 @@ class EP(likelihood):
self.iterations += 1 self.iterations += 1
#Sigma recomptutation with Cholesky decompositon #Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K B = np.eye(self.num_data) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B) L = jitchol(B)
V,info = dtrtrs(L,Sroot_tilde_K,lower=1) V,info = dtrtrs(L,Sroot_tilde_K,lower=1)
Sigma = K - np.dot(V.T,V) Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,self.v_tilde) mu = np.dot(Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
self.np1.append(self.tau_tilde.copy()) self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy()) self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables() return self._compute_GP_variables()
def fit_DTC(self, Kmm, Kmn): def fit_DTC(self, Kmm, Kmn, epsilon=1e-3,power_ep=[1.,1.]):
""" """
The expectation-propagation algorithm with sparse pseudo-input. The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013. For nomenclature see ... 2013.
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float
:param power_ep: Power EP parameters
:type power_ep: list of floats
""" """
self.epsilon = epsilon
self.eta, self.delta = power_ep
num_inducing = Kmm.shape[0] num_inducing = Kmm.shape[0]
#TODO: this doesn't work with uncertain inputs! #TODO: this doesn't work with uncertain inputs!
@ -173,7 +200,7 @@ class EP(likelihood):
Sigma = Diag + P*R.T*R*P.T + K Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma mu = w + P*Gamma
""" """
mu = np.zeros(self.N) mu = np.zeros(self.num_data)
LLT = Kmm.copy() LLT = Kmm.copy()
Sigma_diag = Qnn_diag.copy() Sigma_diag = Qnn_diag.copy()
@ -183,15 +210,15 @@ class EP(likelihood):
sigma_ = 1./tau_ sigma_ = 1./tau_
mu_ = v_/tau_ mu_ = v_/tau_
""" """
self.tau_ = np.empty(self.N,dtype=float) self.tau_ = np.empty(self.num_data,dtype=float)
self.v_ = np.empty(self.N,dtype=float) self.v_ = np.empty(self.num_data,dtype=float)
#Initial values - Marginal moments #Initial values - Marginal moments
z = np.empty(self.N,dtype=float) z = np.empty(self.num_data,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float) self.Z_hat = np.empty(self.num_data,dtype=float)
phi = np.empty(self.N,dtype=float) phi = np.empty(self.num_data,dtype=float)
mu_hat = np.empty(self.N,dtype=float) mu_hat = np.empty(self.num_data,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float) sigma2_hat = np.empty(self.num_data,dtype=float)
#Approximation #Approximation
epsilon_np1 = 1 epsilon_np1 = 1
@ -200,13 +227,13 @@ class EP(likelihood):
np1 = [self.tau_tilde.copy()] np1 = [self.tau_tilde.copy()]
np2 = [self.v_tilde.copy()] np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N) update_order = np.random.permutation(self.num_data)
for i in update_order: for i in update_order:
#Cavity distribution parameters #Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments #Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
@ -229,18 +256,26 @@ class EP(likelihood):
Sigma_diag = np.sum(V*V,-2) Sigma_diag = np.sum(V*V,-2)
Knmv_tilde = np.dot(Kmn,self.v_tilde) Knmv_tilde = np.dot(Kmn,self.v_tilde)
mu = np.dot(V2.T,Knmv_tilde) mu = np.dot(V2.T,Knmv_tilde)
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.num_data
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.num_data
np1.append(self.tau_tilde.copy()) np1.append(self.tau_tilde.copy())
np2.append(self.v_tilde.copy()) np2.append(self.v_tilde.copy())
self._compute_GP_variables() self._compute_GP_variables()
def fit_FITC(self, Kmm, Kmn, Knn_diag): def fit_FITC(self, Kmm, Kmn, Knn_diag, epsilon=1e-3,power_ep=[1.,1.]):
""" """
The expectation-propagation algorithm with sparse pseudo-input. The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008. For nomenclature see Naish-Guzman and Holden, 2008.
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float
:param power_ep: Power EP parameters
:type power_ep: list of floats
""" """
self.epsilon = epsilon
self.eta, self.delta = power_ep
num_inducing = Kmm.shape[0] num_inducing = Kmm.shape[0]
""" """
@ -263,9 +298,9 @@ class EP(likelihood):
Sigma = Diag + P*R.T*R*P.T + K Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma mu = w + P*Gamma
""" """
self.w = np.zeros(self.N) self.w = np.zeros(self.num_data)
self.Gamma = np.zeros(num_inducing) self.Gamma = np.zeros(num_inducing)
mu = np.zeros(self.N) mu = np.zeros(self.num_data)
P = P0.copy() P = P0.copy()
R = R0.copy() R = R0.copy()
Diag = Diag0.copy() Diag = Diag0.copy()
@ -278,15 +313,15 @@ class EP(likelihood):
sigma_ = 1./tau_ sigma_ = 1./tau_
mu_ = v_/tau_ mu_ = v_/tau_
""" """
self.tau_ = np.empty(self.N,dtype=float) self.tau_ = np.empty(self.num_data,dtype=float)
self.v_ = np.empty(self.N,dtype=float) self.v_ = np.empty(self.num_data,dtype=float)
#Initial values - Marginal moments #Initial values - Marginal moments
z = np.empty(self.N,dtype=float) z = np.empty(self.num_data,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float) self.Z_hat = np.empty(self.num_data,dtype=float)
phi = np.empty(self.N,dtype=float) phi = np.empty(self.num_data,dtype=float)
mu_hat = np.empty(self.N,dtype=float) mu_hat = np.empty(self.num_data,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float) sigma2_hat = np.empty(self.num_data,dtype=float)
#Approximation #Approximation
epsilon_np1 = 1 epsilon_np1 = 1
@ -295,13 +330,13 @@ class EP(likelihood):
self.np1 = [self.tau_tilde.copy()] self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()] self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N) update_order = np.random.permutation(self.num_data)
for i in update_order: for i in update_order:
#Cavity distribution parameters #Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments #Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i]) self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
@ -334,8 +369,8 @@ class EP(likelihood):
self.w = Diag * self.v_tilde self.w = Diag * self.v_tilde
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde)) self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
mu = self.w + np.dot(P,self.Gamma) mu = self.w + np.dot(P,self.Gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.num_data
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.num_data
self.np1.append(self.tau_tilde.copy()) self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy()) self.np2.append(self.v_tilde.copy())

View file

@ -0,0 +1,385 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
from likelihood import likelihood
class EP_Mixed_Noise(likelihood):
def __init__(self,data_list,noise_model_list,epsilon=1e-3,power_ep=[1.,1.]):
"""
Expectation Propagation
Arguments
---------
:param data_list: list of outputs
:param noise_model_list: a list of noise models
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations
:type epsilon: float
:param power_ep: list of power ep parameters
"""
assert len(data_list) == len(noise_model_list)
self.noise_model_list = noise_model_list
n_list = [data.size for data in data_list]
self.n_models = len(data_list)
self.n_params = [noise_model._get_params().size for noise_model in noise_model_list]
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.n_models),n_list)])
self.epsilon = epsilon
self.eta, self.delta = power_ep
self.data = np.vstack(data_list)
self.N, self.output_dim = self.data.shape
self.is_heteroscedastic = True
self.Nparams = 0#FIXME
self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)])
#TODO non-gaussian index
#Initial values - Likelihood approximation parameters:
#p(y|f) = t(f|tau_tilde,v_tilde)
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
#initial values for the GP variables
self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = 0.
def restart(self):
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)[:,None]
self.Z = 0
self.YYT = None
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = 0.
def predictive_values(self,mu,var,full_cov,noise_model):
"""
Predicts the output given the GP
:param mu: GP's mean
:param var: GP's variance
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: False|True
:param noise_model: noise model to use
:type noise_model: integer
"""
if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
#_mu = []
#_var = []
#_q1 = []
#_q2 = []
#for m,v,o in zip(mu,var,output.flatten()):
# a,b,c,d = self.noise_model_list[int(o)].predictive_values(m,v)
# _mu.append(a)
# _var.append(b)
# _q1.append(c)
# _q2.append(d)
#return np.vstack(_mu),np.vstack(_var),np.vstack(_q1),np.vstack(_q2)
return self.noise_model_list[noise_model].predictive_values(mu,var)
def _get_params(self):
return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list])
def _get_param_names(self):
names = []
for noise_model in self.noise_model_list:
names += noise_model._get_param_names()
return names
def _set_params(self,p):
cs_params = np.cumsum([0]+self.n_params)
for i in range(len(self.n_params)):
self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]])
def _gradients(self,partial):
#NOTE this is not tested
return np.hstack([noise_model._gradients(partial) for noise_model in self.noise_model_list])
def _compute_GP_variables(self):
#Variables to be called from GP
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
self.Y = mu_tilde[:,None]
self.YYT = np.dot(self.Y,self.Y.T)
self.covariance_matrix = np.diag(1./self.tau_tilde)
self.precision = self.tau_tilde[:,None]
self.V = self.precision * self.Y
self.VVT_factor = self.V
self.trYYT = np.trace(self.YYT)
def fit_full(self,K):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006.
"""
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.N)
Sigma = K.copy()
"""
Initial values - Cavity distribution parameters:
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = self.epsilon + 1.
epsilon_np2 = self.epsilon + 1.
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
mu = np.dot(Sigma,self.v_tilde)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B)
V,info = dtrtrs(L,Sroot_tilde_K,lower=1)
Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()
def fit_DTC(self, Kmm, Kmn):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013.
"""
num_inducing = Kmm.shape[0]
#TODO: this doesn't work with uncertain inputs!
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = Qnn = Knm*Kmmi*Kmn
"""
KmnKnm = np.dot(Kmn,Kmn.T)
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy()
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
#KmnKnm = np.dot(Kmn, Kmn.T)
#KmmiKmn = np.dot(Kmmi,Kmn)
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
#LLT0 = Kmm.copy()
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
mu = np.zeros(self.N)
LLT = Kmm.copy()
Sigma_diag = Qnn_diag.copy()
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
np1 = [self.tau_tilde.copy()]
np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
L = jitchol(LLT)
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
V,info = dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu += (Delta_v-Delta_tau*mu[i])*si
self.iterations += 1
#Sigma recomputation with Cholesky decompositon
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
V2,info = dtrtrs(L.T,V,lower=0)
Sigma_diag = np.sum(V*V,-2)
Knmv_tilde = np.dot(Kmn,self.v_tilde)
mu = np.dot(V2.T,Knmv_tilde)
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N
np1.append(self.tau_tilde.copy())
np2.append(self.v_tilde.copy())
self._compute_GP_variables()
def fit_FITC(self, Kmm, Kmn, Knn_diag):
"""
The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008.
"""
num_inducing = Kmm.shape[0]
"""
Prior approximation parameters:
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
"""
Lm = jitchol(Kmm)
Lmi = chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
P0 = Kmn.T
KmnKnm = np.dot(P0.T, P0)
KmmiKmn = np.dot(Kmmi,P0.T)
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
Diag0 = Knn_diag - Qnn_diag
R0 = jitchol(Kmmi).T
"""
Posterior approximation: q(f|y) = N(f| mu, Sigma)
Sigma = Diag + P*R.T*R*P.T + K
mu = w + P*Gamma
"""
self.w = np.zeros(self.N)
self.Gamma = np.zeros(num_inducing)
mu = np.zeros(self.N)
P = P0.copy()
R = R0.copy()
Diag = Diag0.copy()
Sigma_diag = Knn_diag
RPT0 = np.dot(R0,P0.T)
"""
Initial values - Cavity distribution parameters:
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
sigma_ = 1./tau_
mu_ = v_/tau_
"""
self.tau_ = np.empty(self.N,dtype=float)
self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments
z = np.empty(self.N,dtype=float)
self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float)
#Approximation
epsilon_np1 = 1
epsilon_np2 = 1
self.iterations = 0
self.np1 = [self.tau_tilde.copy()]
self.np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N)
for i in update_order:
#Cavity distribution parameters
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
#Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
self.tau_tilde[i] += Delta_tau
self.v_tilde[i] += Delta_v
#Posterior distribution parameters update
dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i]
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = P[i,:].reshape(1,num_inducing)
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
mu = self.w + np.dot(P,self.Gamma)
self.iterations += 1
#Sigma recomptutation with Cholesky decompositon
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
Diag = Diag0 * Iplus_Dprod_i
P = Iplus_Dprod_i[:,None] * P0
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
R,info = dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
self.w = Diag * self.v_tilde
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
mu = self.w + np.dot(P,self.Gamma)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
self.np1.append(self.tau_tilde.copy())
self.np2.append(self.v_tilde.copy())
return self._compute_GP_variables()

View file

@ -7,9 +7,9 @@ class Gaussian(likelihood):
""" """
Likelihood class for doing Expectation propagation Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray) :param data: observed output
..Note:: Y values allowed depend on the likelihood_function used :type data: Nx1 numpy.darray
:param variance : :param variance: noise parameter
:param normalize: whether to normalize the data before computing (predictions will be in original scales) :param normalize: whether to normalize the data before computing (predictions will be in original scales)
:type normalize: False|True :type normalize: False|True
""" """
@ -34,6 +34,8 @@ class Gaussian(likelihood):
self._variance = np.asarray(variance) + 1. self._variance = np.asarray(variance) + 1.
self._set_params(np.asarray(variance)) self._set_params(np.asarray(variance))
super(Gaussian, self).__init__()
def set_data(self, data): def set_data(self, data):
self.data = data self.data = data
self.N, D = data.shape self.N, D = data.shape
@ -88,11 +90,5 @@ class Gaussian(likelihood):
_95pc = mean + 2.*np.sqrt(true_var) _95pc = mean + 2.*np.sqrt(true_var)
return mean, true_var, _5pc, _95pc return mean, true_var, _5pc, _95pc
def fit_full(self):
"""
No approximations needed
"""
pass
def _gradients(self, partial): def _gradients(self, partial):
return np.sum(partial) return np.sum(partial)

View file

@ -0,0 +1,108 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
from likelihood import likelihood
from . import Gaussian
class Gaussian_Mixed_Noise(likelihood):
"""
Gaussian Likelihood for multiple outputs
This is a wrapper around likelihood.Gaussian class
:param data_list: data observations
:type data_list: list of numpy arrays (num_data_output_i x 1), one array per output
:param noise_params: noise parameters of each output
:type noise_params: list of floats, one per output
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
:type normalize: False|True
"""
def __init__(self, data_list, noise_params=None, normalize=True):
self.Nparams = len(data_list)
self.n_list = [data.size for data in data_list]
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.Nparams),self.n_list)])
if noise_params is None:
noise_params = [1.] * self.Nparams
else:
assert self.Nparams == len(noise_params), 'Number of noise parameters does not match the number of noise models.'
self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)]
self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list]
self.data = np.vstack(data_list)
self.N, self.output_dim = self.data.shape
self._offset = np.zeros((1, self.output_dim))
self._scale = np.ones((1, self.output_dim))
self.is_heteroscedastic = True
self.Z = 0. # a correction factor which accounts for the approximation made
self.set_data(data_list)
self._set_params(np.asarray(noise_params))
super(Gaussian_Mixed_Noise, self).__init__()
def set_data(self, data_list):
self.data = np.vstack(data_list)
self.N, D = self.data.shape
assert D == self.output_dim
self.Y = (self.data - self._offset) / self._scale
if D > self.N:
raise NotImplementedError
#self.YYT = np.dot(self.Y, self.Y.T)
#self.trYYT = np.trace(self.YYT)
#self.YYT_factor = jitchol(self.YYT)
else:
self.YYT = None
self.trYYT = np.sum(np.square(self.Y))
self.YYT_factor = self.Y
def predictive_values(self,mu,var,full_cov,noise_model):
"""
Predicts the output given the GP
:param mu: GP's mean
:param var: GP's variance
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: False|True
:param noise_model: noise model to use
:type noise_model: integer
"""
if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
return self.noise_model_list[noise_model].predictive_values(mu,var,full_cov)
def _get_params(self):
return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list])
def _get_param_names(self):
if len(self.noise_model_list) == 1:
names = self.noise_model_list[0]._get_param_names()
else:
names = []
for noise_model,i in zip(self.noise_model_list,range(len(self.n_list))):
names.append(''.join(noise_model._get_param_names() + ['_%s' %i]))
return names
def _set_params(self,p):
cs_params = np.cumsum([0]+self.n_params)
for i in range(len(self.n_params)):
self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]])
self.precision = np.hstack([np.repeat(noise_model.precision,n) for noise_model,n in zip(self.noise_model_list,self.n_list)])[:,None]
self.V = self.precision * self.Y
self.VVT_factor = self.precision * self.YYT_factor
self.covariance_matrix = np.eye(self.N) * 1./self.precision
def _gradients(self,partial):
gradients = []
aux = np.cumsum([0]+self.n_list)
for ai,af,noise_model in zip(aux[:-1],aux[1:],self.noise_model_list):
gradients += [noise_model._gradients(partial[ai:af])]
return np.hstack(gradients)

View file

@ -13,7 +13,7 @@ from functools import partial
class Laplace(likelihood): class Laplace(likelihood):
"""Laplace approximation to a posterior""" """Laplace approximation to a posterior"""
def __init__(self, data, likelihood_function, extra_data=None, opt='rasm'): def __init__(self, data, noise_model, extra_data=None, opt='rasm'):
""" """
Laplace Approximation Laplace Approximation
@ -28,13 +28,13 @@ class Laplace(likelihood):
--------- ---------
:data: array of data the likelihood function is approximating :data: array of data the likelihood function is approximating
:likelihood_function: likelihood function - subclass of likelihood_function :noise_model: likelihood function - subclass of noise_model
:extra_data: additional data used by some likelihood functions, for example survival likelihoods need censoring data :extra_data: additional data used by some likelihood functions, for example survival likelihoods need censoring data
:opt: Optimiser to use, rasm numerically stable, ncg or nelder-mead (latter only work with 1d data) :opt: Optimiser to use, rasm numerically stable, ncg or nelder-mead (latter only work with 1d data)
""" """
self.data = data self.data = data
self.likelihood_function = likelihood_function self.noise_model = noise_model
self.extra_data = extra_data self.extra_data = extra_data
self.opt = opt self.opt = opt
@ -45,6 +45,7 @@ class Laplace(likelihood):
self.NORMAL_CONST = ((0.5 * self.N) * np.log(2 * np.pi)) self.NORMAL_CONST = ((0.5 * self.N) * np.log(2 * np.pi))
self.restart() self.restart()
likelihood.__init__(self)
def restart(self): def restart(self):
#Initial values for the GP variables #Initial values for the GP variables
@ -59,49 +60,55 @@ class Laplace(likelihood):
def predictive_values(self, mu, var, full_cov): def predictive_values(self, mu, var, full_cov):
if full_cov: if full_cov:
raise NotImplementedError("Cannot make correlated predictions with an Laplace likelihood") raise NotImplementedError("Cannot make correlated predictions with an Laplace likelihood")
return self.likelihood_function.predictive_values(mu, var) return self.noise_model.predictive_values(mu, var)
def _get_params(self): def _get_params(self):
return np.asarray(self.likelihood_function._get_params()) return np.asarray(self.noise_model._get_params())
def _get_param_names(self): def _get_param_names(self):
return self.likelihood_function._get_param_names() return self.noise_model._get_param_names()
def _set_params(self, p): def _set_params(self, p):
return self.likelihood_function._set_params(p) return self.noise_model._set_params(p)
def _shared_gradients_components(self): def _shared_gradients_components(self):
d3lik_d3fhat = self.likelihood_function.d3lik_d3f(self.data, self.f_hat, extra_data=self.extra_data) d3lik_d3fhat = self.noise_model.d3lik_d3f(self.data, self.f_hat, extra_data=self.extra_data)
dL_dfhat = 0.5*(np.diag(self.Ki_W_i)[:, None]*d3lik_d3fhat).T #why isn't this -0.5? dL_dfhat = 0.5*(np.diag(self.Ki_W_i)[:, None]*d3lik_d3fhat).T #why isn't this -0.5?
I_KW_i = np.eye(self.N) - np.dot(self.K, self.Wi_K_i) I_KW_i = np.eye(self.N) - np.dot(self.K, self.Wi_K_i)
return dL_dfhat, I_KW_i return dL_dfhat, I_KW_i
def _Kgradients(self, dK_dthetaK, X): def _Kgradients(self):
""" """
Gradients with respect to prior kernel parameters Gradients with respect to prior kernel parameters
""" """
dL_dfhat, I_KW_i = self._shared_gradients_components() dL_dfhat, I_KW_i = self._shared_gradients_components()
dlp = self.likelihood_function.dlik_df(self.data, self.f_hat) dlp = self.noise_model.dlik_df(self.data, self.f_hat)
#Explicit #Explicit
expl_a = np.dot(self.Ki_f, self.Ki_f.T) #expl_a = np.dot(self.Ki_f, self.Ki_f.T)
expl_b = self.Wi_K_i #expl_b = self.Wi_K_i
expl = 0.5*expl_a - 0.5*expl_b #expl = 0.5*expl_a - 0.5*expl_b
dL_dthetaK_exp = dK_dthetaK(expl, X) #dL_dthetaK_exp = dK_dthetaK(expl, X)
#Implicit #Implicit
impl = mdot(dlp, dL_dfhat, I_KW_i) impl = mdot(dlp, dL_dfhat, I_KW_i)
dL_dthetaK_imp = dK_dthetaK(impl, X)
#print "K: dL_dthetaK_exp: {} dL_dthetaK_implicit: {}".format(dL_dthetaK_exp, dL_dthetaK_imp) #No longer required as we are computing these in the gp already otherwise we would take them away and add them back
dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp #dL_dthetaK_imp = dK_dthetaK(impl, X)
return dL_dthetaK #dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
#dL_dK = expl + impl
#No need to compute explicit as we are computing dZ_dK to account for the difference
#Between the K gradients of a normal GP, and the K gradients including the implicit part
dL_dK = impl
return dL_dK
def _gradients(self, partial): def _gradients(self, partial):
""" """
Gradients with respect to likelihood parameters Gradients with respect to likelihood parameters
""" """
dL_dfhat, I_KW_i = self._shared_gradients_components() dL_dfhat, I_KW_i = self._shared_gradients_components()
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = self.likelihood_function._gradients(self.data, self.f_hat) dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = self.noise_model._laplace_gradients(self.data, self.f_hat)
num_params = len(dlik_dthetaL) num_params = len(dlik_dthetaL)
# make space for one derivative for each likelihood parameter # make space for one derivative for each likelihood parameter
@ -163,12 +170,13 @@ class Laplace(likelihood):
Y_tilde = Wi*self.Ki_f + self.f_hat Y_tilde = Wi*self.Ki_f + self.f_hat
#self.Wi_K_i = self.W_12*self.Bi*self.W_12.T #same as rasms R #self.Wi_K_i = self.W_12*self.Bi*self.W_12.T #same as rasms R
self.Wi_K_i = self.W_12*cho_solve((self.B_chol, True), np.diagflat(self.W_12)) #self.Wi_K_i = self.W_12*cho_solve((self.B_chol, True), np.diagflat(self.W_12))
self.Wi_K_i = self.W12BiW12
#self.Wi_K_i, _, _, self.ln_det_Wi_K = pdinv(self.Sigma_tilde + self.K) # TODO: Check if Wi_K_i == R above and same with det below #self.Wi_K_i, _, _, self.ln_det_Wi_K = pdinv(self.Sigma_tilde + self.K) # TODO: Check if Wi_K_i == R above and same with det below
self.ln_det_Wi_K = pddet(self.Sigma_tilde + self.K) self.ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
self.lik = self.likelihood_function.link_function(self.data, self.f_hat, extra_data=self.extra_data) self.lik = self.noise_model.link_function(self.data, self.f_hat, extra_data=self.extra_data)
self.y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde) self.y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
Z_tilde = (+ self.lik Z_tilde = (+ self.lik
@ -185,6 +193,10 @@ class Laplace(likelihood):
self.covariance_matrix = self.Sigma_tilde self.covariance_matrix = self.Sigma_tilde
self.precision = 1.0 / np.diag(self.covariance_matrix)[:, None] self.precision = 1.0 / np.diag(self.covariance_matrix)[:, None]
#Compute dZ_dK which is how the approximated distributions gradients differ from the dL_dK computed for other likelihoods
self.dZ_dK = self._Kgradients()
#+ 0.5*self.Wi_K_i - 0.5*np.dot(self.Ki_f, self.Ki_f.T) #since we are not adding the K gradients explicit part theres no need to compute this again
def fit_full(self, K): def fit_full(self, K):
""" """
The laplace approximation algorithm, find K and expand hessian The laplace approximation algorithm, find K and expand hessian
@ -205,27 +217,19 @@ class Laplace(likelihood):
def _compute_likelihood_variables(self): def _compute_likelihood_variables(self):
#At this point get the hessian matrix (or vector as W is diagonal) #At this point get the hessian matrix (or vector as W is diagonal)
self.W = -self.likelihood_function.d2lik_d2f(self.data, self.f_hat, extra_data=self.extra_data) self.W = -self.noise_model.d2lik_d2f(self.data, self.f_hat, extra_data=self.extra_data)
if not self.likelihood_function.log_concave:
#print "Under 1e-6: {}".format(np.sum(self.W < 1e-6))
self.W[self.W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
#If the likelihood is non-log-concave. We wan't to say that there is a negative variance
#To cause the posterior to become less certain than the prior and likelihood,
#This is a property only held by non-log-concave likelihoods
#TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though #TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though
self.B, self.B_chol, self.W_12 = self._compute_B_statistics(self.K, self.W) self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N))
self.Bi, _, _, self.ln_B_det = pdinv(self.B)
#Do the computation again at f to get Ki_f which is useful #Do the computation again at f to get Ki_f which is useful
#b = self.W*self.f_hat + self.likelihood_function.dlik_df(self.data, self.f_hat, extra_data=self.extra_data) #b = self.W*self.f_hat + self.noise_model.dlik_df(self.data, self.f_hat, extra_data=self.extra_data)
#solve_chol = cho_solve((self.B_chol, True), np.dot(self.W_12*self.K, b)) #solve_chol = cho_solve((self.B_chol, True), np.dot(self.W_12*self.K, b))
#a = b - self.W_12*solve_chol #a = b - self.W_12*solve_chol
self.Ki_f = self.a self.Ki_f = self.a
self.f_Ki_f = np.dot(self.f_hat.T, self.Ki_f) self.f_Ki_f = np.dot(self.f_hat.T, self.Ki_f)
self.Ki_W_i = self.K - mdot(self.K, self.W_12*cho_solve((self.B_chol, True), np.diagflat(self.W_12)), self.K) self.Ki_W_i = self.K - mdot(self.K, self.W12BiW12, self.K)
#For det, |I + KW| == |I + W_12*K*W_12| #For det, |I + KW| == |I + W_12*K*W_12|
#self.ln_I_KW_det = pddet(np.eye(self.N) + self.W_12*self.K*self.W_12.T) #self.ln_I_KW_det = pddet(np.eye(self.N) + self.W_12*self.K*self.W_12.T)
@ -233,12 +237,12 @@ class Laplace(likelihood):
#self.ln_I_KW_det = pddet(np.eye(self.N) + np.dot(self.K, self.W)) #self.ln_I_KW_det = pddet(np.eye(self.N) + np.dot(self.K, self.W))
#self.ln_z_hat = (- 0.5*self.f_Ki_f #self.ln_z_hat = (- 0.5*self.f_Ki_f
#- self.ln_I_KW_det #- self.ln_I_KW_det
#+ self.likelihood_function.link_function(self.data, self.f_hat, extra_data=self.extra_data) #+ self.noise_model.link_function(self.data, self.f_hat, extra_data=self.extra_data)
#) #)
return self._compute_GP_variables() return self._compute_GP_variables()
def _compute_B_statistics(self, K, W): def _compute_B_statistics(self, K, W, a):
"""Rasmussen suggests the use of a numerically stable positive definite matrix B """Rasmussen suggests the use of a numerically stable positive definite matrix B
Which has a positive diagonal element and can be easyily inverted Which has a positive diagonal element and can be easyily inverted
@ -246,17 +250,28 @@ class Laplace(likelihood):
:W: Negative hessian at a point (diagonal matrix) :W: Negative hessian at a point (diagonal matrix)
:returns: (B, L) :returns: (B, L)
""" """
if not self.noise_model.log_concave:
#print "Under 1e-10: {}".format(np.sum(W < 1e-10))
W[W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
# To cause the posterior to become less certain than the prior and likelihood,
# This is a property only held by non-log-concave likelihoods
#W is diagonal so its sqrt is just the sqrt of the diagonal elements #W is diagonal so its sqrt is just the sqrt of the diagonal elements
W_12 = np.sqrt(W) W_12 = np.sqrt(W)
B = np.eye(self.N) + W_12*K*W_12.T B = np.eye(self.N) + W_12*K*W_12.T
L = jitchol(B) L = jitchol(B)
return (B, L, W_12)
W12BiW12= W_12*cho_solve((L, True), W_12*a)
ln_B_det = 2*np.sum(np.log(np.diag(L)))
return (W12BiW12, ln_B_det)
def nelder_mode(self, K): def nelder_mode(self, K):
f = np.zeros((self.N, 1)) f = np.zeros((self.N, 1))
self.Ki, _, _, self.ln_K_det = pdinv(K) self.Ki, _, _, self.ln_K_det = pdinv(K)
def obj(f): def obj(f):
res = -1 * (self.likelihood_function.link_function(self.data[:, 0], f, extra_data=self.extra_data) - 0.5*np.dot(f.T, np.dot(self.Ki, f))) res = -1 * (self.noise_model.link_function(self.data[:, 0], f, extra_data=self.extra_data) - 0.5*np.dot(f.T, np.dot(self.Ki, f)))
return float(res) return float(res)
res = sp.optimize.minimize(obj, f, method='nelder-mead', options={'xtol': 1e-7, 'maxiter': 25000, 'disp': True}) res = sp.optimize.minimize(obj, f, method='nelder-mead', options={'xtol': 1e-7, 'maxiter': 25000, 'disp': True})
@ -276,16 +291,16 @@ class Laplace(likelihood):
#FIXME: Can we get rid of this horrible reshaping? #FIXME: Can we get rid of this horrible reshaping?
#ONLY WORKS FOR 1D DATA #ONLY WORKS FOR 1D DATA
def obj(f): def obj(f):
res = -1 * (self.likelihood_function.link_function(self.data[:, 0], f, extra_data=self.extra_data) - 0.5 * np.dot(f.T, np.dot(self.Ki, f)) res = -1 * (self.noise_model.link_function(self.data[:, 0], f, extra_data=self.extra_data) - 0.5 * np.dot(f.T, np.dot(self.Ki, f))
- self.NORMAL_CONST) - self.NORMAL_CONST)
return float(res) return float(res)
def obj_grad(f): def obj_grad(f):
res = -1 * (self.likelihood_function.dlik_df(self.data[:, 0], f, extra_data=self.extra_data) - np.dot(self.Ki, f)) res = -1 * (self.noise_model.dlik_df(self.data[:, 0], f, extra_data=self.extra_data) - np.dot(self.Ki, f))
return np.squeeze(res) return np.squeeze(res)
def obj_hess(f): def obj_hess(f):
res = -1 * (np.diag(self.likelihood_function.d2lik_d2f(self.data[:, 0], f, extra_data=self.extra_data)) - self.Ki) res = -1 * (np.diag(self.noise_model.d2lik_d2f(self.data[:, 0], f, extra_data=self.extra_data)) - self.Ki)
return np.squeeze(res) return np.squeeze(res)
f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess, disp=False) f_hat = sp.optimize.fmin_ncg(obj, f, fprime=obj_grad, fhess=obj_hess, disp=False)
@ -301,7 +316,7 @@ class Laplace(likelihood):
:MAX_RESTART: Maximum number of restarts (reducing step_size) before forcing finish of optimisation :MAX_RESTART: Maximum number of restarts (reducing step_size) before forcing finish of optimisation
:returns: f_mode :returns: f_mode
""" """
#self.old_before_s = self.likelihood_function._get_params() #self.old_before_s = self.noise_model._get_params()
#print "before: ", self.old_before_s #print "before: ", self.old_before_s
#if self.old_before_s < 1e-5: #if self.old_before_s < 1e-5:
@ -317,7 +332,7 @@ class Laplace(likelihood):
old_obj = np.inf old_obj = np.inf
def obj(a, f): def obj(a, f):
return -0.5*np.dot(a.T, f) + self.likelihood_function.link_function(self.data, f, extra_data=self.extra_data) return -0.5*np.dot(a.T, f) + self.noise_model.link_function(self.data, f, extra_data=self.extra_data)
difference = np.inf difference = np.inf
epsilon = 1e-6 epsilon = 1e-6
@ -326,23 +341,17 @@ class Laplace(likelihood):
i = 0 i = 0
while difference > epsilon and i < MAX_ITER:# and rs < MAX_RESTART: while difference > epsilon and i < MAX_ITER:# and rs < MAX_RESTART:
W = -self.likelihood_function.d2lik_d2f(self.data, f, extra_data=self.extra_data) W = -self.noise_model.d2lik_d2f(self.data, f, extra_data=self.extra_data)
#W = np.maximum(W, 0)
if not self.likelihood_function.log_concave:
#print "Under 1e-10: {}".format(np.sum(W < 1e-10))
W[W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
# To cause the posterior to become less certain than the prior and likelihood,
# This is a property only held by non-log-concave likelihoods
B, L, W_12 = self._compute_B_statistics(K, W.copy())
W_f = W*f W_f = W*f
grad = self.likelihood_function.dlik_df(self.data, f, extra_data=self.extra_data) grad = self.noise_model.dlik_df(self.data, f, extra_data=self.extra_data)
b = W_f + grad b = W_f + grad
solve_L = cho_solve((L, True), W_12*np.dot(K, b)) #TODO!!!
W12BiW12Kb, _ = self._compute_B_statistics(K, W.copy(), np.dot(K, b))
#solve_L = cho_solve((L, True), W_12*np.dot(K, b))
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet #Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
full_step_a = b - W_12*solve_L full_step_a = b - W12BiW12Kb
da = full_step_a - old_a da = full_step_a - old_a
f_old = f.copy() f_old = f.copy()
@ -355,7 +364,7 @@ class Laplace(likelihood):
i_o = partial(inner_obj, old_a=old_a, da=da, K=K) i_o = partial(inner_obj, old_a=old_a, da=da, K=K)
#new_obj = sp.optimize.brent(i_o, tol=1e-4, maxiter=20) #new_obj = sp.optimize.brent(i_o, tol=1e-4, maxiter=20)
new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':20}).fun new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':30}).fun
f = self.f.copy() f = self.f.copy()
a = self.a.copy() a = self.a.copy()
@ -435,13 +444,13 @@ class Laplace(likelihood):
pb.close('all') pb.close('all')
#FIXME: DELETE THESE #FIXME: DELETE THESE
self.old_W = W.copy() #self.old_W = W.copy()
self.old_grad = grad.copy() #self.old_grad = grad.copy()
self.old_B = B.copy() #self.old_B = B.copy()
self.old_W_12 = W_12.copy() #self.old_W_12 = W_12.copy()
self.old_ff = f.copy() #self.old_ff = f.copy()
self.old_K = self.K.copy() #self.old_K = self.K.copy()
self.old_s = self.likelihood_function._get_params() #self.old_s = self.noise_model._get_params()
#print "after: ", self.old_s #print "after: ", self.old_s
#print "FINAL a max: {} a min: {} a var: {}".format(np.max(self.a), np.min(self.a), np.var(self.a)) #print "FINAL a max: {} a min: {} a var: {}".format(np.max(self.a), np.min(self.a), np.var(self.a))
self.a = a self.a = a

View file

@ -1,7 +1,8 @@
import numpy as np import numpy as np
import copy import copy
from ..core.parameterized import Parameterized
class likelihood: class likelihood(Parameterized):
""" """
The atom for a likelihood class The atom for a likelihood class
@ -9,17 +10,20 @@ class likelihood:
(Gaussian) inherits directly from this, as does the EP algorithm (Gaussian) inherits directly from this, as does the EP algorithm
Some things must be defined for this to work properly: Some things must be defined for this to work properly:
self.Y : the effective Gaussian target of the GP
self.N, self.D : Y.shape - self.Y : the effective Gaussian target of the GP
self.covariance_matrix : the effective (noise) covariance of the GP targets - self.N, self.D : Y.shape
self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP) - self.covariance_matrix : the effective (noise) covariance of the GP targets
self.is_heteroscedastic : enables significant computational savings in GP - self.Z : a factor which gets added to the likelihood (0 for a Gaussian, Z_EP for EP)
self.precision : a scalar or vector representation of the effective target precision - self.is_heteroscedastic : enables significant computational savings in GP
self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N - self.precision : a scalar or vector representation of the effective target precision
self.V : self.precision * self.Y - self.YYT : (optional) = np.dot(self.Y, self.Y.T) enables computational savings for D>N
- self.V : self.precision * self.Y
""" """
def __init__(self,data): def __init__(self):
raise ValueError, "this class is not to be instantiated" Parameterized.__init__(self)
self.dZ_dK = 0
def _get_params(self): def _get_params(self):
raise NotImplementedError raise NotImplementedError
@ -33,12 +37,20 @@ class likelihood:
def fit(self): def fit(self):
raise NotImplementedError raise NotImplementedError
def fit_full(self, K):
"""
No approximations needed by default
"""
pass
def restart(self):
"""
No need to restart if not an approximation
"""
pass
def _gradients(self, partial): def _gradients(self, partial):
raise NotImplementedError raise NotImplementedError
def predictive_values(self, mu, var): def predictive_values(self, mu, var):
raise NotImplementedError raise NotImplementedError
def copy(self):
""" Returns a (deep) copy of the current likelihood """
return copy.deepcopy(self)

View file

@ -1,46 +0,0 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from ..util.plot import gpplot
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
class LinkFunction(object):
"""
Link function class for doing non-Gaussian likelihoods approximation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
"""
def __init__(self):
pass
class Probit(LinkFunction):
"""
Probit link function: Squashes a likelihood between 0 and 1
"""
def transf(self,mu):
pass
def inv_transf(self,f):
pass
def log_inv_transf(self,f):
pass
class Nothing(LinkFunction):
"""
Probit link function: Squashes a likelihood between 0 and 1
"""
def transf(self,mu):
return mu
def inv_transf(self,f):
return f
def log_inv_transf(self,f):
return np.log(f)

View file

@ -0,0 +1,115 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import noise_models
def binomial(gp_link=None):
"""
Construct a binomial likelihood
:param gp_link: a GPy gp_link function
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Probit()
#else:
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
if isinstance(gp_link,noise_models.gp_transformations.Probit):
analytical_mean = True
analytical_variance = False
elif isinstance(gp_link,noise_models.gp_transformations.Heaviside):
analytical_mean = True
analytical_variance = True
else:
analytical_mean = False
analytical_variance = False
return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance)
def exponential(gp_link=None):
"""
Construct a binomial likelihood
:param gp_link: a GPy gp_link function
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Identity()
analytical_mean = False
analytical_variance = False
return noise_models.exponential_noise.Exponential(gp_link,analytical_mean,analytical_variance)
def gaussian_ep(gp_link=None,variance=1.):
"""
Construct a gaussian likelihood
:param gp_link: a GPy gp_link function
:param variance: scalar
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Identity()
#else:
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
analytical_mean = False
analytical_variance = False
return noise_models.gaussian_noise.Gaussian(gp_link,analytical_mean,analytical_variance,variance)
def poisson(gp_link=None):
"""
Construct a Poisson likelihood
:param gp_link: a GPy gp_link function
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Log_ex_1()
#else:
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
analytical_mean = False
analytical_variance = False
return noise_models.poisson_noise.Poisson(gp_link,analytical_mean,analytical_variance)
def gamma(gp_link=None,beta=1.):
"""
Construct a Gamma likelihood
:param gp_link: a GPy gp_link function
:param beta: scalar
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Log_ex_1()
analytical_mean = False
analytical_variance = False
return noise_models.gamma_noise.Gamma(gp_link,analytical_mean,analytical_variance,beta)
def gaussian(gp_link=None, variance=2, D=None, N=None):
"""
Construct a Gaussian likelihood
:param gp_link: a GPy gp_link function
:param variance: scalar, variance
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Identity()
analytical_mean = True
analytical_variance = True # ?
return noise_models.gaussian_noise.Gaussian(gp_link, analytical_mean,
analytical_variance, variance=variance, D=D, N=N)
def student_t(gp_link=None, deg_free=5, sigma2=2):
"""
Construct a Student t likelihood
:param gp_link: a GPy gp_link function
:param deg_free: scalar, degrees of freedom
:param sigma2: scalar, variance
"""
if gp_link is None:
gp_link = noise_models.gp_transformations.Identity()
analytical_mean = True
analytical_variance = True
return noise_models.student_t_noise.StudentT(gp_link, analytical_mean,
analytical_variance,deg_free, sigma2)

View file

@ -0,0 +1,8 @@
import noise_distributions
import binomial_noise
import exponential_noise
import gaussian_noise
import gamma_noise
import poisson_noise
import student_t_noise
import gp_transformations

View file

@ -0,0 +1,119 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Binomial(NoiseDistribution):
"""
Probit likelihood
Y is expected to take values in {-1,1}
-----
$$
L(x) = \\Phi (Y_i*f_i)
$$
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
..Note:: Binary classification algorithm works better with classes {-1,1}
"""
Y_prep = Y.copy()
Y1 = Y[Y.flatten()==1].size
Y2 = Y[Y.flatten()==0].size
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
Y_prep[Y.flatten() == 0] = -1
return Y_prep
def _moments_match_analytical(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
if isinstance(self.gp_link,gp_transformations.Probit):
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z)
phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
elif isinstance(self.gp_link,gp_transformations.Heaviside):
a = data_i*v_i/np.sqrt(tau_i)
Z_hat = std_norm_cdf(a)
N = std_norm_pdf(a)
mu_hat = v_i/tau_i + data_i*N/Z_hat/np.sqrt(tau_i)
sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
if np.any(np.isnan([Z_hat, mu_hat, sigma2_hat])):
stop
return Z_hat, mu_hat, sigma2_hat
def _predictive_mean_analytical(self,mu,sigma):
if isinstance(self.gp_link,gp_transformations.Probit):
return stats.norm.cdf(mu/np.sqrt(1+sigma**2))
elif isinstance(self.gp_link,gp_transformations.Heaviside):
return stats.norm.cdf(mu/sigma)
else:
raise NotImplementedError
def _predictive_variance_analytical(self,mu,sigma, pred_mean):
if isinstance(self.gp_link,gp_transformations.Heaviside):
return 0.
else:
raise NotImplementedError
def _mass(self,gp,obs):
#NOTE obs must be in {0,1}
p = self.gp_link.transf(gp)
return p**obs * (1.-p)**(1.-obs)
def _nlog_mass(self,gp,obs):
p = self.gp_link.transf(gp)
return obs*np.log(p) + (1.-obs)*np.log(1-p)
def _dnlog_mass_dgp(self,gp,obs):
p = self.gp_link.transf(gp)
dp = self.gp_link.dtransf_df(gp)
return obs/p * dp - (1.-obs)/(1.-p) * dp
def _d2nlog_mass_dgp2(self,gp,obs):
p = self.gp_link.transf(gp)
return (obs/p + (1.-obs)/(1.-p))*self.gp_link.d2transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.gp_link.dtransf_df(gp)
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
p = self.gp_link.transf(gp)
return p*(1.-p)
def _dvariance_dgp(self,gp):
return self.gp_link.dtransf_df(gp)*(1. - 2.*self.gp_link.transf(gp))
def _d2variance_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2

View file

@ -0,0 +1,68 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Exponential(NoiseDistribution):
"""
Expoential likelihood
Y is expected to take values in {0,1,2,...}
-----
$$
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y):
return Y
def _mass(self,gp,obs):
"""
Mass (or density) function
"""
return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp)
def _nlog_mass(self,gp,obs):
"""
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
"""
return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp))
def _dnlog_mass_dgp(self,gp,obs):
return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp)
def _d2nlog_mass_dgp2(self,gp,obs):
fgp = self.gp_link.transf(gp)
return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp)
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)**2
def _dvariance_dgp(self,gp):
return 2*self.gp_link.transf(gp)*self.gp_link.dtransf_df(gp)
def _d2variance_dgp2(self,gp):
return 2 * (self.gp_link.dtransf_df(gp)**2 + self.gp_link.transf(gp)*self.gp_link.d2transf_df2(gp))

View file

@ -0,0 +1,71 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Gamma(NoiseDistribution):
"""
Gamma likelihood
Y is expected to take values in {0,1,2,...}
-----
$$
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
self.beta = beta
super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y):
return Y
def _mass(self,gp,obs):
"""
Mass (or density) function
"""
#return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance)
alpha = self.gp_link.transf(gp)*self.beta
return obs**(alpha - 1.) * np.exp(-self.beta*obs) * self.beta**alpha / special.gamma(alpha)
def _nlog_mass(self,gp,obs):
"""
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
"""
alpha = self.gp_link.transf(gp)*self.beta
return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
def _dnlog_mass_dgp(self,gp,obs):
return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
def _d2nlog_mass_dgp2(self,gp,obs):
return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)/self.beta
def _dvariance_dgp(self,gp):
return self.gp_link.dtransf_df(gp)/self.beta
def _d2variance_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)/self.beta

View file

@ -0,0 +1,208 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Gaussian(NoiseDistribution):
"""
Gaussian likelihood
:param mean: mean value of the Gaussian distribution
:param variance: mean value of the Gaussian distribution
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1., D=None, N=None):
self.variance = variance
self.D = D
self.N = N
self._set_params(np.asarray(variance))
super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance)
def _get_params(self):
return np.array([self.variance])
def _get_param_names(self):
return ['noise_model_variance']
def _set_params(self, p):
self.variance = float(p)
self.I = np.eye(self.N)
self.covariance_matrix = self.I * self.variance
self.Ki = self.I*(1.0 / self.variance)
self.ln_det_K = np.sum(np.log(np.diag(self.covariance_matrix)))
def _laplace_gradients(self, y, f, extra_data=None):
#must be listed in same order as 'get_param_names'
derivs = ([self.dlik_dvar(y, f, extra_data=extra_data)],
[self.dlik_df_dvar(y, f, extra_data=extra_data)],
[self.d2lik_d2f_dvar(y, f, extra_data=extra_data)]
) # lists as we might learn many parameters
# ensure we have gradients for every parameter we want to optimize
assert len(derivs[0]) == len(self._get_param_names())
assert len(derivs[1]) == len(self._get_param_names())
assert len(derivs[2]) == len(self._get_param_names())
return derivs
def _gradients(self,partial):
return np.zeros(1)
#return np.sum(partial)
def _preprocess_values(self,Y):
"""
Check if the values of the observations correspond to the values
assumed by the likelihood function.
"""
return Y
def _moments_match_analytical(self,data_i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
sigma2_hat = 1./(1./self.variance + tau_i)
mu_hat = sigma2_hat*(data_i/self.variance + v_i)
sum_var = self.variance + 1./tau_i
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
return Z_hat, mu_hat, sigma2_hat
def _predictive_mean_analytical(self,mu,sigma):
new_sigma2 = self.predictive_variance(mu,sigma)
return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance)
def _predictive_variance_analytical(self,mu,sigma):
return 1./(1./self.variance + 1./sigma**2)
def _mass(self,gp,obs):
#return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) )
return stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance))
def _nlog_mass(self,gp,obs):
return .5*((self.gp_link.transf(gp)-obs)**2/self.variance + np.log(2.*np.pi*self.variance))
def _dnlog_mass_dgp(self,gp,obs):
return (self.gp_link.transf(gp)-obs)/self.variance * self.gp_link.dtransf_df(gp)
def _d2nlog_mass_dgp2(self,gp,obs):
return ((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2)/self.variance
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.variance
def _dvariance_dgp(self,gp):
return 0
def _d2variance_dgp2(self,gp):
return 0
def link_function(self, y, f, extra_data=None):
"""link_function $\ln p(y|f)$
$$\ln p(y_{i}|f_{i}) = \ln $$
:y: data
:f: latent variables f
:extra_data: extra_data which is not used in student t distribution
:returns: float(likelihood evaluated for this point)
"""
assert y.shape == f.shape
e = y - f
eeT = np.dot(e, e.T)
objective = (- 0.5*self.D*np.log(2*np.pi)
- 0.5*self.ln_det_K
- (0.5/self.variance)*np.dot(e.T, e) # As long as K is diagonal
)
return np.sum(objective)
def dlik_df(self, y, f, extra_data=None):
"""
Gradient of the link function at y, given f w.r.t f
:y: data
:f: latent variables f
:extra_data: extra_data which is not used in student t distribution
:returns: gradient of likelihood evaluated at points
"""
assert y.shape == f.shape
s2_i = (1.0/self.variance)*self.I
grad = np.dot(s2_i, y) - np.dot(s2_i, f)
return grad
def d2lik_d2f(self, y, f, extra_data=None):
"""
Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j
i.e. second derivative link_function at y given f f_j w.r.t f and f_j
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_{i} depends only on f_{i} not on f_{j!=i}
:y: data
:f: latent variables f
:extra_data: extra_data which is not used in student t distribution
:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
"""
assert y.shape == f.shape
s2_i = (1.0/self.variance)*self.I
hess = np.diag(-s2_i)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS?
return hess
def d3lik_d3f(self, y, f, extra_data=None):
"""
Third order derivative link_function (log-likelihood ) at y given f f_j w.r.t f and f_j
$$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$
"""
assert y.shape == f.shape
d3lik_d3f = np.diagonal(0*self.I)[:, None] # FIXME: CAREFUL THIS MAY NOT WORK WITH MULTIDIMENSIONS?
return d3lik_d3f
def dlik_dvar(self, y, f, extra_data=None):
"""
Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation)
"""
assert y.shape == f.shape
e = y - f
s_4 = 1.0/(self.variance**2)
dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.dot(e.T, e)
return np.sum(dlik_dsigma) # Sure about this sum?
def dlik_df_dvar(self, y, f, extra_data=None):
"""
Gradient of the dlik_df w.r.t sigma parameter (standard deviation)
"""
assert y.shape == f.shape
s_4 = 1.0/(self.variance**2)
dlik_grad_dsigma = -np.dot(s_4*self.I, y) + np.dot(s_4*self.I, f)
return dlik_grad_dsigma
def d2lik_d2f_dvar(self, y, f, extra_data=None):
"""
Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation)
$$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$
"""
assert y.shape == f.shape
dlik_hess_dsigma = np.diag((1.0/(self.variance**2))*self.I)[:, None]
return dlik_hess_dsigma

View file

@ -0,0 +1,133 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
import scipy as sp
import pylab as pb
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
class GPTransformation(object):
"""
Link function class for doing non-Gaussian likelihoods approximation
:param Y: observed output (Nx1 numpy.darray)
.. note:: Y values allowed depend on the likelihood_function used
"""
def __init__(self):
pass
def transf(self,f):
"""
Gaussian process tranformation function, latent space -> output space
"""
pass
def dtransf_df(self,f):
"""
derivative of transf(f) w.r.t. f
"""
pass
def d2transf_df2(self,f):
"""
second derivative of transf(f) w.r.t. f
"""
pass
class Identity(GPTransformation):
"""
.. math::
g(f) = f
"""
def transf(self,f):
return f
def dtransf_df(self,f):
return 1.
def d2transf_df2(self,f):
return 0
class Probit(GPTransformation):
"""
.. math::
g(f) = \\Phi^{-1} (mu)
"""
def transf(self,f):
return std_norm_cdf(f)
def dtransf_df(self,f):
return std_norm_pdf(f)
def d2transf_df2(self,f):
return -f * std_norm_pdf(f)
class Log(GPTransformation):
"""
.. math::
g(f) = \\log(\\mu)
"""
def transf(self,f):
return np.exp(f)
def dtransf_df(self,f):
return np.exp(f)
def d2transf_df2(self,f):
return np.exp(f)
class Log_ex_1(GPTransformation):
"""
.. math::
g(f) = \\log(\\exp(\\mu) - 1)
"""
def transf(self,f):
return np.log(1.+np.exp(f))
def dtransf_df(self,f):
return np.exp(f)/(1.+np.exp(f))
def d2transf_df2(self,f):
aux = np.exp(f)/(1.+np.exp(f))
return aux*(1.-aux)
class Reciprocal(GPTransformation):
def transf(sefl,f):
return 1./f
def dtransf_df(self,f):
return -1./f**2
def d2transf_df2(self,f):
return 2./f**3
class Heaviside(GPTransformation):
"""
.. math::
g(f) = I_{x \\in A}
"""
def transf(self,f):
#transformation goes here
return np.where(f>0, 1, 0)
def dtransf_df(self,f):
raise NotImplementedError, "This function is not differentiable!"
def d2transf_df2(self,f):
raise NotImplementedError, "This function is not differentiable!"

View file

@ -0,0 +1,417 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
import pylab as pb
from GPy.util.plot import gpplot
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
class NoiseDistribution(object):
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
.. note:: Y values allowed depend on the LikelihoodFunction used
"""
def __init__(self,gp_link,analytical_mean=False,analytical_variance=False):
assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."
self.gp_link = gp_link
self.analytical_mean = analytical_mean
self.analytical_variance = analytical_variance
if self.analytical_mean:
self.moments_match = self._moments_match_analytical
self.predictive_mean = self._predictive_mean_analytical
else:
self.moments_match = self._moments_match_numerical
self.predictive_mean = self._predictive_mean_numerical
if self.analytical_variance:
self.predictive_variance = self._predictive_variance_analytical
else:
self.predictive_variance = self._predictive_variance_numerical
self.log_concave = True
def _get_params(self):
return np.zeros(0)
def _get_param_names(self):
return []
def _set_params(self,p):
pass
def _gradients(self,partial):
return np.zeros(0)
def _preprocess_values(self,Y):
"""
In case it is needed, this function assess the output values or makes any pertinent transformation on them.
:param Y: observed output
:type Y: Nx1 numpy.darray
"""
return Y
def _product(self,gp,obs,mu,sigma):
"""
Product between the cavity distribution and a likelihood factor.
:param gp: latent variable
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs)
def _nlog_product_scaled(self,gp,obs,mu,sigma):
"""
Negative log-product between the cavity distribution and a likelihood factor.
.. note:: The constant term in the Gaussian distribution is ignored.
:param gp: latent variable
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs)
def _dnlog_product_dgp(self,gp,obs,mu,sigma):
"""
Derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor.
:param gp: latent variable
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs)
def _d2nlog_product_dgp2(self,gp,obs,mu,sigma):
"""
Second derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor.
:param gp: latent variable
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs)
def _product_mode(self,obs,mu,sigma):
"""
Newton's CG method to find the mode in _product (cavity x likelihood factor).
:param obs: observed output
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma),disp=False)
def _moments_match_analytical(self,obs,tau,v):
"""
If available, this function computes the moments analytically.
"""
pass
def _moments_match_numerical(self,obs,tau,v):
"""
Lapace approximation to calculate the moments.
:param obs: observed output
:param tau: cavity distribution 1st natural parameter (precision)
:param v: cavity distribution 2nd natural paramenter (mu*precision)
"""
mu = v/tau
mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau))
sigma2_hat = 1./(tau + self._d2nlog_mass_dgp2(mu_hat,obs))
Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat)
return Z_hat,mu_hat,sigma2_hat
def _nlog_conditional_mean_scaled(self,gp,mu,sigma):
"""
Negative logarithm of the l.v.'s predictive distribution times the output's mean given the l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
.. note:: This function helps computing E(Y_star) = E(E(Y_star|f_star))
"""
return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp))
def _dnlog_conditional_mean_dgp(self,gp,mu,sigma):
"""
Derivative of _nlog_conditional_mean_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp)
def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma):
"""
Second derivative of _nlog_conditional_mean_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2
def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma):
"""
Negative logarithm of the l.v.'s predictive distribution times the output's variance given the l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
.. note:: This function helps computing E(V(Y_star|f_star))
"""
return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp))
def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma):
"""
Derivative of _nlog_exp_conditional_variance_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp)
def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma):
"""
Second derivative of _nlog_exp_conditional_variance_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2
def _nlog_exp_conditional_mean_sq_scaled(self,gp,mu,sigma):
"""
Negative logarithm of the l.v.'s predictive distribution times the output's mean squared given the l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
.. note:: This function helps computing E( E(Y_star|f_star)**2 )
"""
return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp))
def _dnlog_exp_conditional_mean_sq_dgp(self,gp,mu,sigma):
"""
Derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp)
def _d2nlog_exp_conditional_mean_sq_dgp2(self,gp,mu,sigma):
"""
Second derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v.
:param gp: latent variable
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 )
def _predictive_mean_analytical(self,mu,sigma):
"""
If available, this function computes the predictive mean analytically.
"""
pass
def _predictive_variance_analytical(self,mu,sigma):
"""
If available, this function computes the predictive variance analytically.
"""
pass
def _predictive_mean_numerical(self,mu,sigma):
"""
Laplace approximation to the predictive mean: E(Y_star) = E( E(Y_star|f_star) )
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma),disp=False)
mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma)
"""
pb.figure()
x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)])
f = np.array([np.exp(-self._nlog_conditional_mean_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x])
pb.plot(x,f,'b-')
sigma2 = 1./self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma)
f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2)
k = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2)
pb.plot(x,f2*mean,'r-')
pb.vlines(maximum,0,f.max())
"""
return mean
def _predictive_mean_sq(self,mu,sigma):
"""
Laplace approximation to the predictive mean squared: E(Y_star**2) = E( E(Y_star|f_star)**2 )
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
"""
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma),disp=False)
mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma)
return mean_squared
def _predictive_variance_numerical(self,mu,sigma,predictive_mean=None):
"""
Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
"""
# E( V(Y_star|f_star) )
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma),disp=False)
exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma)
"""
pb.figure()
x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)])
f = np.array([np.exp(-self._nlog_exp_conditional_variance_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x])
pb.plot(x,f,'b-')
sigma2 = 1./self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma)
f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2)
k = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2)
pb.plot(x,f2*exp_var,'r--')
pb.vlines(maximum,0,f.max())
"""
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star)**2 )
exp_exp2 = self._predictive_mean_sq(mu,sigma)
if predictive_mean is None:
predictive_mean = self.predictive_mean(mu,sigma)
var_exp = exp_exp2 - predictive_mean**2
return exp_var + var_exp
def _predictive_percentiles(self,p,mu,sigma):
"""
Percentiles of the predictive distribution
:parm p: lower tail probability
:param mu: cavity distribution mean
:param sigma: cavity distribution standard deviation
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
"""
qf = stats.norm.ppf(p,mu,sigma)
return self.gp_link.transf(qf)
def _nlog_joint_predictive_scaled(self,x,mu,sigma):
"""
Negative logarithm of the joint predictive distribution (latent variable and output).
:param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation
"""
return self._nlog_product_scaled(x[0],x[1],mu,sigma)
def _gradient_nlog_joint_predictive(self,x,mu,sigma):
"""
Gradient of _nlog_joint_predictive_scaled.
:param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation
.. note: Only available when the output is continuous
"""
assert not self.discrete, "Gradient not available for discrete outputs."
return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0])))
def _hessian_nlog_joint_predictive(self,x,mu,sigma):
"""
Hessian of _nlog_joint_predictive_scaled.
:param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation
.. note: Only available when the output is continuous
"""
assert not self.discrete, "Hessian not available for discrete outputs."
cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1])
return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2)
def _joint_predictive_mode(self,mu,sigma):
"""
Negative logarithm of the joint predictive distribution (latent variable and output).
:param x: tuple (latent variable,output)
:param mu: latent variable's predictive mean
:param sigma: latent variable's predictive standard deviation
"""
return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma),disp=False)
def predictive_values(self,mu,var):
"""
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
:param mu: mean of the latent variable
:param var: variance of the latent variable
"""
if isinstance(mu,float) or isinstance(mu,int):
mu = [mu]
var = [var]
pred_mean = []
pred_var = []
q1 = []
q3 = []
for m,s in zip(mu,np.sqrt(var)):
pred_mean.append(self.predictive_mean(m,s))
pred_var.append(self.predictive_variance(m,s,pred_mean[-1]))
q1.append(self._predictive_percentiles(.025,m,s))
q3.append(self._predictive_percentiles(.975,m,s))
pred_mean = np.vstack(pred_mean)
pred_var = np.vstack(pred_var)
q1 = np.vstack(q1)
q3 = np.vstack(q3)
return pred_mean, pred_var, q1, q3

View file

@ -0,0 +1,69 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
import gp_transformations
from noise_distributions import NoiseDistribution
class Poisson(NoiseDistribution):
"""
Poisson likelihood
.. math::
L(x) = \\exp(\\lambda) * \\frac{\\lambda^Y_i}{Y_i!}
..Note: Y is expected to take values in {0,1,2,...}
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
def _preprocess_values(self,Y): #TODO
return Y
def _mass(self,gp,obs):
"""
Mass (or density) function
"""
return stats.poisson.pmf(obs,self.gp_link.transf(gp))
def _nlog_mass(self,gp,obs):
"""
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
"""
return self.gp_link.transf(gp) - obs * np.log(self.gp_link.transf(gp)) + np.log(special.gamma(obs+1))
def _dnlog_mass_dgp(self,gp,obs):
return self.gp_link.dtransf_df(gp) * (1. - obs/self.gp_link.transf(gp))
def _d2nlog_mass_dgp2(self,gp,obs):
d2_df = self.gp_link.d2transf_df2(gp)
transf = self.gp_link.transf(gp)
return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dmean_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2mean_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _dvariance_dgp(self,gp):
return self.gp_link.dtransf_df(gp)
def _d2variance_dgp2(self,gp):
return self.gp_link.d2transf_df2(gp)

View file

@ -0,0 +1,249 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
import gp_transformations
from noise_distributions import NoiseDistribution
from scipy import stats, integrate
from scipy.special import gammaln, gamma
class StudentT(NoiseDistribution):
"""
Student T likelihood
For nomanclature see Bayesian Data Analysis 2003 p576
$$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2)$$
.. math::
Fill in maths
"""
def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2):
self.v = deg_free
self.sigma2 = sigma2
self._set_params(np.asarray(sigma2))
super(StudentT, self).__init__(gp_link,analytical_mean,analytical_variance)
self.log_concave = False
def _get_params(self):
return np.asarray(self.sigma2)
def _get_param_names(self):
return ["t_noise_std2"]
def _set_params(self, x):
self.sigma2 = float(x)
@property
def variance(self, extra_data=None):
return (self.v / float(self.v - 2)) * self.sigma2
def link_function(self, y, f, extra_data=None):
"""link_function $\ln p(y|f)$
$$\ln p(y_{i}|f_{i}) = \ln \Gamma(\frac{v+1}{2}) - \ln \Gamma(\frac{v}{2})\sqrt{v \pi}\sigma - \frac{v+1}{2}\ln (1 + \frac{1}{v}\left(\frac{y_{i} - f_{i}}{\sigma}\right)^2$$
For wolfram alpha import parts for derivative of sigma are -log(sqrt(v*pi)*s) -(1/2)*(v + 1)*log(1 + (1/v)*((y-f)/(s))^2))
:y: data
:f: latent variables f
:extra_data: extra_data which is not used in student t distribution
:returns: float(likelihood evaluated for this point)
"""
assert y.shape == f.shape
e = y - f
objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5)
- 0.5*np.log(self.sigma2 * self.v * np.pi)
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
)
return np.sum(objective)
def dlik_df(self, y, f, extra_data=None):
"""
Gradient of the link function at y, given f w.r.t f
$$\frac{dp(y_{i}|f_{i})}{df} = \frac{(v+1)(y_{i}-f_{i})}{(y_{i}-f_{i})^{2} + \sigma^{2}v}$$
:y: data
:f: latent variables f
:extra_data: extra_data which is not used in student t distribution
:returns: gradient of likelihood evaluated at points
"""
assert y.shape == f.shape
e = y - f
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad
def d2lik_d2f(self, y, f, extra_data=None):
"""
Hessian at this point (if we are only looking at the link function not the prior) the hessian will be 0 unless i == j
i.e. second derivative link_function at y given f f_j w.r.t f and f_j
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_{i} depends only on f_{i} not on f_{j!=i}
$$\frac{d^{2}p(y_{i}|f_{i})}{d^{3}f} = \frac{(v+1)((y_{i}-f_{i})^{2} - \sigma^{2}v)}{((y_{i}-f_{i})^{2} + \sigma^{2}v)^{2}}$$
:y: data
:f: latent variables f
:extra_data: extra_data which is not used in student t distribution
:returns: array which is diagonal of covariance matrix (second derivative of likelihood evaluated at points)
"""
assert y.shape == f.shape
e = y - f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess
def d3lik_d3f(self, y, f, extra_data=None):
"""
Third order derivative link_function (log-likelihood ) at y given f f_j w.r.t f and f_j
$$\frac{d^{3}p(y_{i}|f_{i})}{d^{3}f} = \frac{-2(v+1)((y_{i} - f_{i})^3 - 3(y_{i} - f_{i}) \sigma^{2} v))}{((y_{i} - f_{i}) + \sigma^{2} v)^3}$$
"""
assert y.shape == f.shape
e = y - f
d3lik_d3f = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3)
)
return d3lik_d3f
def dlik_dvar(self, y, f, extra_data=None):
"""
Gradient of the likelihood (lik) w.r.t sigma parameter (standard deviation)
Terms relavent to derivatives wrt sigma are:
-log(sqrt(v*pi)*s) -(1/2)*(v + 1)*log(1 + (1/v)*((y-f)/(s))^2))
$$\frac{dp(y_{i}|f_{i})}{d\sigma} = -\frac{1}{\sigma} + \frac{(1+v)(y_{i}-f_{i})^2}{\sigma^3 v(1 + \frac{1}{v}(\frac{(y_{i} - f_{i})}{\sigma^2})^2)}$$
"""
assert y.shape == f.shape
e = y - f
dlik_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return np.sum(dlik_dvar) #May not want to sum over all dimensions if using many D?
def dlik_df_dvar(self, y, f, extra_data=None):
"""
Gradient of the dlik_df w.r.t sigma parameter (standard deviation)
$$\frac{d}{d\sigma}(\frac{dp(y_{i}|f_{i})}{df}) = \frac{-2\sigma v(v + 1)(y_{i}-f_{i})}{(y_{i}-f_{i})^2 + \sigma^2 v)^2}$$
"""
assert y.shape == f.shape
e = y - f
dlik_grad_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlik_grad_dvar
def d2lik_d2f_dvar(self, y, f, extra_data=None):
"""
Gradient of the hessian (d2lik_d2f) w.r.t sigma parameter (standard deviation)
$$\frac{d}{d\sigma}(\frac{d^{2}p(y_{i}|f_{i})}{d^{2}f}) = \frac{2\sigma v(v + 1)(\sigma^2 v - 3(y-f)^2)}{((y-f)^2 + \sigma^2 v)^3}$$
"""
assert y.shape == f.shape
e = y - f
dlik_hess_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ ((self.sigma2*self.v + (e**2))**3)
)
return dlik_hess_dvar
def _laplace_gradients(self, y, f, extra_data=None):
#must be listed in same order as 'get_param_names'
derivs = ([self.dlik_dvar(y, f, extra_data=extra_data)],
[self.dlik_df_dvar(y, f, extra_data=extra_data)],
[self.d2lik_d2f_dvar(y, f, extra_data=extra_data)]
) # lists as we might learn many parameters
# ensure we have gradients for every parameter we want to optimize
assert len(derivs[0]) == len(self._get_param_names())
assert len(derivs[1]) == len(self._get_param_names())
assert len(derivs[2]) == len(self._get_param_names())
return derivs
def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None):
"""
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
"""
#We want the variance around test points y which comes from int p(y*|f*)p(f*) df*
#Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)]
#Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this
#Which was also given to us as (var)
#We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution
#However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom
true_var = sigma**2 + self.variance
print "True var: {}".format(true_var)
return true_var
def _predictive_mean_analytical(self, mu, var):
"""
Compute mean of the prediction
"""
return mu
def sample_predicted_values(self, mu, var):
""" Experimental sample approches and numerical integration """
raise NotImplementedError
#p_025 = stats.t.ppf(.025, mu)
#p_975 = stats.t.ppf(.975, mu)
num_test_points = mu.shape[0]
#Each mu is the latent point f* at the test point x*,
#and the var is the gaussian variance at this point
#Take lots of samples from this, so we have lots of possible values
#for latent point f* for each test point x* weighted by how likely we were to pick it
print "Taking %d samples of f*".format(num_test_points)
num_f_samples = 10
num_y_samples = 10
student_t_means = np.random.normal(loc=mu, scale=np.sqrt(var), size=(num_test_points, num_f_samples))
print "Student t means shape: ", student_t_means.shape
#Now we have lots of f*, lets work out the likelihood of getting this by sampling
#from a student t centred on this point, sample many points from this distribution
#centred on f*
#for test_point, f in enumerate(student_t_means):
#print test_point
#print f.shape
#student_t_samples = stats.t.rvs(self.v, loc=f[:,None],
#scale=self.sigma,
#size=(num_f_samples, num_y_samples))
#print student_t_samples.shape
student_t_samples = stats.t.rvs(self.v, loc=student_t_means[:, None],
scale=self.sigma,
size=(num_test_points, num_y_samples, num_f_samples))
student_t_samples = np.reshape(student_t_samples,
(num_test_points, num_y_samples*num_f_samples))
#Now take the 97.5 and 0.25 percentile of these points
p_025 = stats.scoreatpercentile(student_t_samples, .025, axis=1)[:, None]
p_975 = stats.scoreatpercentile(student_t_samples, .975, axis=1)[:, None]
##Alernenately we could sample from int p(y|f*)p(f*|x*) df*
def t_gaussian(f, mu, var):
return (((gamma((self.v+1)*0.5)) / (gamma(self.v*0.5)*self.sigma*np.sqrt(self.v*np.pi))) * ((1+(1/self.v)*(((mu-f)/self.sigma)**2))**(-(self.v+1)*0.5))
* ((1/(np.sqrt(2*np.pi*var)))*np.exp(-(1/(2*var)) *((mu-f)**2)))
)
def t_gauss_int(mu, var):
print "Mu: ", mu
print "var: ", var
result = integrate.quad(t_gaussian, 0.025, 0.975, args=(mu, var))
print "Result: ", result
return result[0]
vec_t_gauss_int = np.vectorize(t_gauss_int)
p = vec_t_gauss_int(mu, var)
p_025 = mu - p
p_975 = mu + p
return mu, np.nan*mu, p_025, p_975

View file

@ -10,11 +10,13 @@ class MLP(Mapping):
.. math:: .. math::
f(\mathbf{x}*) = \mathbf{W}^0\boldsymbol{\phi}(\mathbf{W}^1\mathbf{x}+\mathb{b}^1)^* + \mathbf{b}^0 f(\\mathbf{x}*) = \\mathbf{W}^0\\boldsymbol{\\phi}(\\mathbf{W}^1\\mathbf{x}+\\mathbf{b}^1)^* + \\mathbf{b}^0
where where
..math::
\phi(\cdot) = \text{tanh}(\cdot) .. math::
\\phi(\\cdot) = \\text{tanh}(\\cdot)
:param X: input observations :param X: input observations
:type X: ndarray :type X: ndarray
@ -22,6 +24,7 @@ class MLP(Mapping):
:type output_dim: int :type output_dim: int
:param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it. :param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it.
:type hidden_dim: int or list of ints. :type hidden_dim: int or list of ints.
""" """
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3): def __init__(self, input_dim=1, output_dim=1, hidden_dim=3):

View file

@ -14,3 +14,5 @@ from warped_gp import WarpedGP
from bayesian_gplvm import BayesianGPLVM from bayesian_gplvm import BayesianGPLVM
from mrd import MRD from mrd import MRD
from gradient_checker import GradientChecker from gradient_checker import GradientChecker
from gp_multioutput_regression import GPMultioutputRegression
from sparse_gp_multioutput_regression import SparseGPMultioutputRegression

View file

@ -8,7 +8,7 @@ from .. import kern
import itertools import itertools
from matplotlib.colors import colorConverter from matplotlib.colors import colorConverter
from GPy.inference.optimization import SCG from GPy.inference.optimization import SCG
from GPy.util import plot_latent from GPy.util import plot_latent, linalg
from GPy.models.gplvm import GPLVM from GPy.models.gplvm import GPLVM
from GPy.util.plot_latent import most_significant_input_dimensions from GPy.util.plot_latent import most_significant_input_dimensions
from matplotlib import pyplot from matplotlib import pyplot
@ -66,8 +66,8 @@ class BayesianGPLVM(SparseGP, GPLVM):
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
return (X_names + S_names + SparseGP._get_param_names(self)) return (X_names + S_names + SparseGP._get_param_names(self))
def _get_print_names(self): #def _get_print_names(self):
return SparseGP._get_print_names(self) # return SparseGP._get_print_names(self)
def _get_params(self): def _get_params(self):
""" """
@ -140,12 +140,20 @@ class BayesianGPLVM(SparseGP, GPLVM):
dpsi0 = -0.5 * self.input_dim * self.likelihood.precision dpsi0 = -0.5 * self.input_dim * self.likelihood.precision
dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
V = self.likelihood.precision * Y V = self.likelihood.precision * Y
#compute CPsi1V
if self.Cpsi1V is None:
psi1V = np.dot(self.psi1.T, self.likelihood.V)
tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1)
dpsi1 = np.dot(self.Cpsi1V, V.T) dpsi1 = np.dot(self.Cpsi1V, V.T)
start = np.zeros(self.input_dim * 2) start = np.zeros(self.input_dim * 2)
for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]): for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
args = (self.kern, self.Z, dpsi0, dpsi1_n, dpsi2) args = (self.kern, self.Z, dpsi0, dpsi1_n.T, dpsi2)
xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False) xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False)
mu, log_S = xopt.reshape(2, 1, -1) mu, log_S = xopt.reshape(2, 1, -1)
@ -237,12 +245,13 @@ class BayesianGPLVM(SparseGP, GPLVM):
""" """
Plot latent space X in 1D: Plot latent space X in 1D:
-if fig is given, create input_dim subplots in fig and plot in these - if fig is given, create input_dim subplots in fig and plot in these
-if ax is given plot input_dim 1D latent space plots of X into each `axis` - if ax is given plot input_dim 1D latent space plots of X into each `axis`
-if neither fig nor ax is given create a figure with fignum and plot in there - if neither fig nor ax is given create a figure with fignum and plot in there
colors: colors:
colors of different latent space dimensions input_dim colors of different latent space dimensions input_dim
""" """
import pylab import pylab
if ax is None: if ax is None:

View file

@ -44,7 +44,7 @@ class BCGPLVM(GPLVM):
GP._set_params(self, x[self.mapping.num_params:]) GP._set_params(self, x[self.mapping.num_params:])
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
dL_df = 2.*self.kern.dK_dX(self.dL_dK, self.X) dL_df = self.kern.dK_dX(self.dL_dK, self.X)
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y) dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self))) return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))

View file

@ -31,8 +31,8 @@ class FITCClassification(FITC):
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
if likelihood is None: if likelihood is None:
distribution = likelihoods.likelihood_functions.Binomial() noise_model = likelihoods.binomial()
likelihood = likelihoods.EP(Y, distribution) likelihood = likelihoods.EP(Y, noise_model)
elif Y is not None: elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()): if not all(Y.flatten() == likelihood.data.flatten()):
raise Warning, 'likelihood.data and Y are different.' raise Warning, 'likelihood.data and Y are different.'

View file

@ -14,7 +14,7 @@ class GPClassification(GP):
This is a thin wrapper around the models.GP class, with a set of sensible defaults This is a thin wrapper around the models.GP class, with a set of sensible defaults
:param X: input observations :param X: input observations
:param Y: observed values :param Y: observed values, can be None if likelihood is not None
:param likelihood: a GPy likelihood, defaults to Binomial with probit link_function :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
:param kernel: a GPy kernel, defaults to rbf :param kernel: a GPy kernel, defaults to rbf
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
@ -31,8 +31,8 @@ class GPClassification(GP):
kernel = kern.rbf(X.shape[1]) kernel = kern.rbf(X.shape[1])
if likelihood is None: if likelihood is None:
distribution = likelihoods.likelihood_functions.Binomial() noise_model = likelihoods.binomial()
likelihood = likelihoods.EP(Y, distribution) likelihood = likelihoods.EP(Y, noise_model)
elif Y is not None: elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()): if not all(Y.flatten() == likelihood.data.flatten()):
raise Warning, 'likelihood.data and Y are different.' raise Warning, 'likelihood.data and Y are different.'

View file

@ -0,0 +1,58 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
class GPMultioutputRegression(GP):
"""
Multiple output Gaussian process with Gaussian noise
This is a wrapper around the models.GP class, with a set of sensible defaults
:param X_list: input observations
:type X_list: list of numpy arrays (num_data_output_i x input_dim), one array per output
:param Y_list: observed values
:type Y_list: list of numpy arrays (num_data_output_i x 1), one array per output
:param kernel_list: GPy kernels, defaults to rbf
:type kernel_list: list of GPy kernels
:param noise_variance_list: noise parameters per output, defaults to 1.0 for every output
:type noise_variance_list: list of floats
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:param rank: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
:type rank: integer
"""
def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,rank=1):
self.output_dim = len(Y_list)
assert len(X_list) == self.output_dim, 'Number of outputs do not match length of inputs list.'
#Inputs indexing
i = 0
index = []
for x,y in zip(X_list,Y_list):
assert x.shape[0] == y.shape[0]
index.append(np.repeat(i,x.size)[:,None])
i += 1
index = np.vstack(index)
X = np.hstack([np.vstack(X_list),index])
original_dim = X.shape[1] - 1
#Mixed noise likelihood definition
likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,noise_params=noise_variance_list,normalize=normalize_Y)
#Coregionalization kernel definition
if kernel_list is None:
kernel_list = [kern.rbf(original_dim)]
mkernel = kern.build_lcm(input_dim=original_dim, output_dim=self.output_dim, kernel_list = kernel_list, rank=rank)
self.multioutput = True
GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X)
self.ensure_default_constraints()

View file

@ -61,7 +61,7 @@ class GPLVM(GP):
GP._set_params(self, x[self.X.size:]) GP._set_params(self, x[self.X.size:])
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
dL_dX = 2.*self.kern.dK_dX(self.dL_dK, self.X) dL_dX = self.kern.dK_dX(self.dL_dK, self.X)
return np.hstack((dL_dX.flatten(), GP._log_likelihood_gradients(self))) return np.hstack((dL_dX.flatten(), GP._log_likelihood_gradients(self)))

View file

@ -39,6 +39,7 @@ class MRD(Model):
:param num_inducing: number of inducing inputs to use :param num_inducing: number of inducing inputs to use
:param kernels: list of kernels or kernel shared for all BGPLVMS :param kernels: list of kernels or kernel shared for all BGPLVMS
:type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default) :type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default)
""" """
def __init__(self, likelihood_or_Y_list, input_dim, num_inducing=10, names=None, def __init__(self, likelihood_or_Y_list, input_dim, num_inducing=10, names=None,
kernels=None, initx='PCA', kernels=None, initx='PCA',
@ -163,28 +164,31 @@ class MRD(Model):
self._init_X(initx, self.likelihood_list) self._init_X(initx, self.likelihood_list)
self._init_Z(initz, self.X) self._init_Z(initz, self.X)
def _get_latent_param_names(self): #def _get_latent_param_names(self):
def _get_param_names(self):
n1 = self.gref._get_param_names() n1 = self.gref._get_param_names()
n1var = n1[:self.NQ * 2 + self.MQ] n1var = n1[:self.NQ * 2 + self.MQ]
return n1var # return n1var
#
#def _get_kernel_names(self):
def _get_kernel_names(self):
map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x), map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x),
itertools.izip(ns, itertools.izip(ns,
itertools.repeat(name))) itertools.repeat(name)))
kernel_names = (map_names(SparseGP._get_param_names(g)[self.MQ:], n) for g, n in zip(self.bgplvms, self.names)) return list(itertools.chain(n1var, *(map_names(\
return kernel_names SparseGP._get_param_names(g)[self.MQ:], n) \
for g, n in zip(self.bgplvms, self.names))))
# kernel_names = (map_names(SparseGP._get_param_names(g)[self.MQ:], n) for g, n in zip(self.bgplvms, self.names))
# return kernel_names
def _get_param_names(self): #def _get_param_names(self):
# X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
# S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
n1var = self._get_latent_param_names() # n1var = self._get_latent_param_names()
kernel_names = self._get_kernel_names() # kernel_names = self._get_kernel_names()
return list(itertools.chain(n1var, *kernel_names)) # return list(itertools.chain(n1var, *kernel_names))
def _get_print_names(self): #def _get_print_names(self):
return list(itertools.chain(*self._get_kernel_names())) # return list(itertools.chain(*self._get_kernel_names()))
def _get_params(self): def _get_params(self):
""" """
@ -335,8 +339,11 @@ class MRD(Model):
def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs): def plot_scales(self, fignum=None, ax=None, titles=None, sharex=False, sharey=True, *args, **kwargs):
""" """
:param:`titles` :
titles for axes of datasets TODO: Explain other parameters
:param titles: titles for axes of datasets
""" """
if titles is None: if titles is None:
titles = [r'${}$'.format(name) for name in self.names] titles = [r'${}$'.format(name) for name in self.names]

View file

@ -28,11 +28,11 @@ class SparseGPClassification(SparseGP):
def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10): def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10):
if kernel is None: if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3) kernel = kern.rbf(X.shape[1])# + kern.white(X.shape[1],1e-3)
if likelihood is None: if likelihood is None:
distribution = likelihoods.likelihood_functions.Binomial() noise_model = likelihoods.binomial()
likelihood = likelihoods.EP(Y, distribution) likelihood = likelihoods.EP(Y, noise_model)
elif Y is not None: elif Y is not None:
if not all(Y.flatten() == likelihood.data.flatten()): if not all(Y.flatten() == likelihood.data.flatten()):
raise Warning, 'likelihood.data and Y are different.' raise Warning, 'likelihood.data and Y are different.'

View file

@ -0,0 +1,80 @@
# Copyright (c) 2013, Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import SparseGP
from .. import likelihoods
from .. import kern
from ..util import multioutput
class SparseGPMultioutputRegression(SparseGP):
"""
Sparse multiple output Gaussian process with Gaussian noise
This is a wrapper around the models.SparseGP class, with a set of sensible defaults
:param X_list: input observations
:type X_list: list of numpy arrays (num_data_output_i x input_dim), one array per output
:param Y_list: observed values
:type Y_list: list of numpy arrays (num_data_output_i x 1), one array per output
:param kernel_list: GPy kernels, defaults to rbf
:type kernel_list: list of GPy kernels
:param noise_variance_list: noise parameters per output, defaults to 1.0 for every output
:type noise_variance_list: list of floats
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:param Z_list: inducing inputs (optional)
:type Z_list: list of numpy arrays (num_inducing_output_i x input_dim), one array per output | empty list
:param num_inducing: number of inducing inputs per output, defaults to 10 (ignored if Z_list is not empty)
:type num_inducing: integer
:param rank: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
:type rank: integer
"""
#NOTE not tested with uncertain inputs
def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,Z_list=[],num_inducing=10,rank=1):
self.output_dim = len(Y_list)
assert len(X_list) == self.output_dim, 'Number of outputs do not match length of inputs list.'
#Inducing inputs list
if len(Z_list):
assert len(Z_list) == self.output_dim, 'Number of outputs do not match length of inducing inputs list.'
else:
if isinstance(num_inducing,np.int):
num_inducing = [num_inducing] * self.output_dim
num_inducing = np.asarray(num_inducing)
assert num_inducing.size == self.output_dim, 'Number of outputs do not match length of inducing inputs list.'
for ni,X in zip(num_inducing,X_list):
i = np.random.permutation(X.shape[0])[:ni]
Z_list.append(X[i].copy())
#Inputs and inducing inputs indexing
i = 0
index = []
index_z = []
for x,y,z in zip(X_list,Y_list,Z_list):
assert x.shape[0] == y.shape[0]
index.append(np.repeat(i,x.size)[:,None])
index_z.append(np.repeat(i,z.size)[:,None])
i += 1
index = np.vstack(index)
index_z = np.vstack(index_z)
X = np.hstack([np.vstack(X_list),index])
Z = np.hstack([np.vstack(Z_list),index_z])
original_dim = X.shape[1] - 1
#Mixed noise likelihood definition
likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,noise_params=noise_variance_list,normalize=normalize_Y)
#Coregionalization kernel definition
if kernel_list is None:
kernel_list = [kern.rbf(original_dim)]
mkernel = kern.build_lcm(input_dim=original_dim, output_dim=self.output_dim, kernel_list = kernel_list, rank=rank)
self.multioutput = True
SparseGP.__init__(self, X, likelihood, mkernel, Z=Z, normalize_X=normalize_X)
self.constrain_fixed('.*iip_\d+_1')
self.ensure_default_constraints()

View file

@ -20,7 +20,11 @@ class SparseGPRegression(SparseGP):
:type normalize_X: False|True :type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True :type normalize_Y: False|True
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None
:rtype: model object :rtype: model object
:param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (num_data x input_dim) | None
.. Note:: Multiple independent outputs are allowed using columns of Y .. Note:: Multiple independent outputs are allowed using columns of Y

View file

@ -38,7 +38,7 @@ In sparse GPs wouldn't it be clearer to call Z inducing?
In coregionalisation matrix, setting the W to all ones will (surely?) ensure that symmetry isn't broken. Also, but allowing it to scale like that, the output variance increases as rank is increased (and if user sets rank to more than output dim they could get very different results). In coregionalisation matrix, setting the W to all ones will (surely?) ensure that symmetry isn't broken. Also, but allowing it to scale like that, the output variance increases as rank is increased (and if user sets rank to more than output dim they could get very different results).
We are inconsistent about our use of ise and ize e.g. optimize and normalize_X, but coregionalise, we should choose one and stick to it. Suggest -ize. We are inconsistent about our use of ise and ize e.g. optimize and normalize_X, but coregionalise, we should choose one and stick to it. Suggest -ize. Neil- I'm imposing the US spellings to keep things consistent, so -ize it is.
Exceptions: we need to provide a list of exceptions we throw and specify what is thrown where. Exceptions: we need to provide a list of exceptions we throw and specify what is thrown where.

View file

@ -0,0 +1,50 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class BCGPLVMTests(unittest.TestCase):
def test_kernel_backconstraint(self):
num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
X = np.random.rand(num_data, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
k = GPy.kern.mlp(input_dim) + GPy.kern.bias(input_dim)
bk = GPy.kern.rbf(output_dim)
mapping = GPy.mappings.Kernel(output_dim=input_dim, X=Y, kernel=bk)
m = GPy.models.BCGPLVM(Y, input_dim, kernel = k, mapping=mapping)
m.randomize()
self.assertTrue(m.checkgrad())
def test_linear_backconstraint(self):
num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
X = np.random.rand(num_data, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
k = GPy.kern.mlp(input_dim) + GPy.kern.bias(input_dim)
bk = GPy.kern.rbf(output_dim)
mapping = GPy.mappings.Linear(output_dim=input_dim, input_dim=output_dim)
m = GPy.models.BCGPLVM(Y, input_dim, kernel = k, mapping=mapping)
m.randomize()
self.assertTrue(m.checkgrad())
def test_mlp_backconstraint(self):
num_data, num_inducing, input_dim, output_dim = 10, 3, 2, 4
X = np.random.rand(num_data, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(num_data),K,output_dim).T
k = GPy.kern.mlp(input_dim) + GPy.kern.bias(input_dim)
bk = GPy.kern.rbf(output_dim)
mapping = GPy.mappings.MLP(output_dim=input_dim, input_dim=output_dim, hidden_dim=[5, 4, 7])
m = GPy.models.BCGPLVM(Y, input_dim, kernel = k, mapping=mapping)
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -55,7 +55,18 @@ class BGPLVMTests(unittest.TestCase):
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
#@unittest.skip('psi2 cross terms are NotImplemented for this combination') def test_rbf_line_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_linear_bias_kern(self): def test_linear_bias_kern(self):
N, num_inducing, input_dim, D = 30, 5, 4, 30 N, num_inducing, input_dim, D = 30, 5, 4, 30
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)

View file

@ -5,39 +5,71 @@ import unittest
import numpy as np import numpy as np
import GPy import GPy
verbose = False
class KernelTests(unittest.TestCase): class KernelTests(unittest.TestCase):
def test_kerneltie(self): def test_kerneltie(self):
K = GPy.kern.rbf(5, ARD=True) K = GPy.kern.rbf(5, ARD=True)
K.tie_params('.*[01]') K.tie_params('.*[01]')
K.constrain_fixed('2') K.constrain_fixed('2')
X = np.random.rand(5,5) X = np.random.rand(5,5)
Y = np.ones((5,1)) Y = np.ones((5,1))
m = GPy.models.GPRegression(X,Y,K) m = GPy.models.GPRegression(X,Y,K)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_rbfkernel(self): def test_rbfkernel(self):
verbose = False
kern = GPy.kern.rbf(5) kern = GPy.kern.rbf(5)
self.assertTrue(GPy.kern.Kern_check_model(kern).is_positive_definite()) self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
self.assertTrue(GPy.kern.Kern_check_dK_dtheta(kern).checkgrad(verbose=verbose))
self.assertTrue(GPy.kern.Kern_check_dKdiag_dtheta(kern).checkgrad(verbose=verbose)) def test_rbf_sympykernel(self):
self.assertTrue(GPy.kern.Kern_check_dK_dX(kern).checkgrad(verbose=verbose)) kern = GPy.kern.rbf_sympy(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_rbf_invkernel(self):
kern = GPy.kern.rbf_inv(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_Matern32kernel(self):
kern = GPy.kern.Matern32(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_Matern52kernel(self):
kern = GPy.kern.Matern52(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_linearkernel(self):
kern = GPy.kern.linear(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_periodic_exponentialkernel(self):
kern = GPy.kern.periodic_exponential(1)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_periodic_Matern32kernel(self):
kern = GPy.kern.periodic_Matern32(1)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_periodic_Matern52kernel(self):
kern = GPy.kern.periodic_Matern52(1)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_rational_quadratickernel(self):
kern = GPy.kern.rational_quadratic(1)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_gibbskernel(self): def test_gibbskernel(self):
verbose = False
kern = GPy.kern.gibbs(5, mapping=GPy.mappings.Linear(5, 1)) kern = GPy.kern.gibbs(5, mapping=GPy.mappings.Linear(5, 1))
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_heterokernel(self):
kern = GPy.kern.hetero(5, mapping=GPy.mappings.Linear(5, 1), transform=GPy.core.transformations.logexp())
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_mlpkernel(self): def test_mlpkernel(self):
verbose = False
kern = GPy.kern.mlp(5) kern = GPy.kern.mlp(5)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
def test_polykernel(self): def test_polykernel(self):
verbose = False
kern = GPy.kern.poly(5, degree=4) kern = GPy.kern.poly(5, degree=4)
self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose)) self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
@ -48,25 +80,22 @@ class KernelTests(unittest.TestCase):
X = np.random.rand(30, 4) X = np.random.rand(30, 4)
K = np.dot(X, X.T) K = np.dot(X, X.T)
kernel = GPy.kern.fixed(4, K) kernel = GPy.kern.fixed(4, K)
Y = np.ones((30,1)) kern = GPy.kern.poly(5, degree=4)
m = GPy.models.GPRegression(X,Y,kernel=kernel) self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
self.assertTrue(m.checkgrad())
def test_coregionalisation(self): # def test_coregionalization(self):
X1 = np.random.rand(50,1)*8 # X1 = np.random.rand(50,1)*8
X2 = np.random.rand(30,1)*5 # X2 = np.random.rand(30,1)*5
index = np.vstack((np.zeros_like(X1),np.ones_like(X2))) # index = np.vstack((np.zeros_like(X1),np.ones_like(X2)))
X = np.hstack((np.vstack((X1,X2)),index)) # X = np.hstack((np.vstack((X1,X2)),index))
Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05 # Y1 = np.sin(X1) + np.random.randn(*X1.shape)*0.05
Y2 = np.sin(X2) + np.random.randn(*X2.shape)*0.05 + 2. # Y2 = np.sin(X2) + np.random.randn(*X2.shape)*0.05 + 2.
Y = np.vstack((Y1,Y2)) # Y = np.vstack((Y1,Y2))
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalise(2,1)
k = k1.prod(k2,tensor=True)
m = GPy.models.GPRegression(X,Y,kernel=k)
self.assertTrue(m.checkgrad())
# k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
# k2 = GPy.kern.coregionalize(2,1)
# kern = k1**k2
# self.assertTrue(GPy.kern.kern_test(kern, verbose=verbose))
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -72,14 +72,17 @@ class LaplaceTests(unittest.TestCase):
noise = np.random.randn(*self.X.shape)*self.real_std noise = np.random.randn(*self.X.shape)*self.real_std
self.Y = np.sin(self.X*2*np.pi) + noise self.Y = np.sin(self.X*2*np.pi) + noise
#self.Y = np.array([[1.0]])#np.sin(self.X*2*np.pi) + noise #self.Y = np.array([[1.0]])#np.sin(self.X*2*np.pi) + noise
self.var = 0.3 self.var = 0.2
self.f = np.random.rand(self.N, self.D) self.f = np.random.rand(self.N, self.D)
#self.f = np.array([[3.0]])#np.sin(self.X*2*np.pi) + noise #self.f = np.array([[3.0]])#np.sin(self.X*2*np.pi) + noise
self.var = np.random.rand(1) self.var = np.random.rand(1)
self.stu_t = GPy.likelihoods.functions.StudentT(deg_free=5, sigma2=self.var) self.stu_t = GPy.likelihoods.student_t(deg_free=5, sigma2=self.var)
self.gauss = GPy.likelihoods.functions.Gaussian(self.var, self.D, self.N) self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N)
#Make a bigger step as lower bound can be quite curved
self.step = 1e-4
def tearDown(self): def tearDown(self):
self.stu_t = None self.stu_t = None
@ -118,7 +121,7 @@ class LaplaceTests(unittest.TestCase):
noise = np.random.randn(*self.X.shape)*self.real_std noise = np.random.randn(*self.X.shape)*self.real_std
self.Y = np.sin(self.X*2*np.pi) + noise self.Y = np.sin(self.X*2*np.pi) + noise
self.f = np.random.rand(self.N, 1) self.f = np.random.rand(self.N, 1)
self.gauss = GPy.likelihoods.functions.Gaussian(self.var, self.D, self.N) self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N)
dlik_df = functools.partial(self.gauss.dlik_df, self.Y) dlik_df = functools.partial(self.gauss.dlik_df, self.Y)
d2lik_d2f = functools.partial(self.gauss.d2lik_d2f, self.Y) d2lik_d2f = functools.partial(self.gauss.d2lik_d2f, self.Y)
@ -167,6 +170,7 @@ class LaplaceTests(unittest.TestCase):
grad = GradientChecker(link, dlik_df, self.f.copy(), 'f') grad = GradientChecker(link, dlik_df, self.f.copy(), 'f')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad())
def test_studentt_d2lik_d2f(self): def test_studentt_d2lik_d2f(self):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
@ -175,6 +179,7 @@ class LaplaceTests(unittest.TestCase):
grad = GradientChecker(dlik_df, d2lik_d2f, self.f.copy(), 'f') grad = GradientChecker(dlik_df, d2lik_d2f, self.f.copy(), 'f')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad())
def test_studentt_d3lik_d3f(self): def test_studentt_d3lik_d3f(self):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
@ -183,6 +188,7 @@ class LaplaceTests(unittest.TestCase):
grad = GradientChecker(d2lik_d2f, d3lik_d3f, self.f.copy(), 'f') grad = GradientChecker(d2lik_d2f, d3lik_d3f, self.f.copy(), 'f')
grad.randomize() grad.randomize()
grad.checkgrad(verbose=1) grad.checkgrad(verbose=1)
self.assertTrue(grad.checkgrad())
def test_studentt_dlik_dvar(self): def test_studentt_dlik_dvar(self):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
@ -216,27 +222,27 @@ class LaplaceTests(unittest.TestCase):
m = GPy.models.GPRegression(self.X, self.Y.copy(), kernel, likelihood=gauss_laplace) m = GPy.models.GPRegression(self.X, self.Y.copy(), kernel, likelihood=gauss_laplace)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
m.checkgrad(verbose=1) m.checkgrad(verbose=1, step=self.step)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad(step=self.step))
def test_studentt_approx_gauss_rbf(self): def test_studentt_approx_gauss_rbf(self):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.Y = self.Y/self.Y.max() self.Y = self.Y/self.Y.max()
self.stu_t = GPy.likelihoods.functions.StudentT(deg_free=1000, sigma2=self.var) self.stu_t = GPy.likelihoods.student_t(deg_free=1000, sigma2=self.var)
kernel = GPy.kern.rbf(self.X.shape[1]) + GPy.kern.white(self.X.shape[1]) kernel = GPy.kern.rbf(self.X.shape[1]) + GPy.kern.white(self.X.shape[1])
stu_t_laplace = GPy.likelihoods.Laplace(self.Y.copy(), self.stu_t, opt='rasm') stu_t_laplace = GPy.likelihoods.Laplace(self.Y.copy(), self.stu_t, opt='rasm')
m = GPy.models.GPRegression(self.X, self.Y.copy(), kernel, likelihood=stu_t_laplace) m = GPy.models.GPRegression(self.X, self.Y.copy(), kernel, likelihood=stu_t_laplace)
m.ensure_default_constraints() m.ensure_default_constraints()
m.constrain_positive('t_noise') m.constrain_positive('t_noise')
m.randomize() m.randomize()
m.checkgrad(verbose=1) m.checkgrad(verbose=1, step=self.step)
print m print m
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad(step=self.step))
def test_studentt_rbf(self): def test_studentt_rbf(self):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.Y = self.Y/self.Y.max() self.Y = self.Y/self.Y.max()
white_var = 3.0 white_var = 1
kernel = GPy.kern.rbf(self.X.shape[1]) + GPy.kern.white(self.X.shape[1]) kernel = GPy.kern.rbf(self.X.shape[1]) + GPy.kern.white(self.X.shape[1])
stu_t_laplace = GPy.likelihoods.Laplace(self.Y.copy(), self.stu_t, opt='rasm') stu_t_laplace = GPy.likelihoods.Laplace(self.Y.copy(), self.stu_t, opt='rasm')
m = GPy.models.GPRegression(self.X, self.Y.copy(), kernel, likelihood=stu_t_laplace) m = GPy.models.GPRegression(self.X, self.Y.copy(), kernel, likelihood=stu_t_laplace)
@ -244,14 +250,14 @@ class LaplaceTests(unittest.TestCase):
m.constrain_positive('t_noise') m.constrain_positive('t_noise')
m.constrain_fixed('white', white_var) m.constrain_fixed('white', white_var)
m.randomize() m.randomize()
m.checkgrad(verbose=1) m.checkgrad(verbose=1, step=self.step)
print m print m
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad(step=self.step))
def test_studentt_rbf_smallvar(self): def test_studentt_rbf_smallvar(self):
print "\n{}".format(inspect.stack()[0][3]) print "\n{}".format(inspect.stack()[0][3])
self.Y = self.Y/self.Y.max() self.Y = self.Y/self.Y.max()
white_var = 3.0 white_var = 1
kernel = GPy.kern.rbf(self.X.shape[1]) + GPy.kern.white(self.X.shape[1]) kernel = GPy.kern.rbf(self.X.shape[1]) + GPy.kern.white(self.X.shape[1])
stu_t_laplace = GPy.likelihoods.Laplace(self.Y.copy(), self.stu_t, opt='rasm') stu_t_laplace = GPy.likelihoods.Laplace(self.Y.copy(), self.stu_t, opt='rasm')
m = GPy.models.GPRegression(self.X, self.Y.copy(), kernel, likelihood=stu_t_laplace) m = GPy.models.GPRegression(self.X, self.Y.copy(), kernel, likelihood=stu_t_laplace)
@ -259,9 +265,9 @@ class LaplaceTests(unittest.TestCase):
m.constrain_positive('t_noise') m.constrain_positive('t_noise')
m.constrain_fixed('white', white_var) m.constrain_fixed('white', white_var)
m['t_noise'] = 0.01 m['t_noise'] = 0.01
m.checkgrad(verbose=1) m.checkgrad(verbose=1, step=self.step)
print m print m
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad(step=self.step))
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests" print "Running unit tests"

View file

@ -5,7 +5,6 @@
import unittest import unittest
import numpy as np import numpy as np
import GPy import GPy
from GPy.likelihoods.likelihood_functions import Binomial
class GradientTests(unittest.TestCase): class GradientTests(unittest.TestCase):
def setUp(self): def setUp(self):
@ -199,10 +198,7 @@ class GradientTests(unittest.TestCase):
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
kernel = GPy.kern.rbf(1) kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.Binomial() m = GPy.models.GPClassification(X,Y,kernel=kernel)
likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.core.GP(X, likelihood, kernel)
m.ensure_default_constraints()
m.update_likelihood_approximation() m.update_likelihood_approximation()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -212,10 +208,11 @@ class GradientTests(unittest.TestCase):
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
Z = np.linspace(0, 15, 4)[:, None] Z = np.linspace(0, 15, 4)[:, None]
kernel = GPy.kern.rbf(1) kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.Binomial() m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z)
likelihood = GPy.likelihoods.EP(Y, distribution) #distribution = GPy.likelihoods.likelihood_functions.Binomial()
m = GPy.core.SparseGP(X, likelihood, kernel, Z) #likelihood = GPy.likelihoods.EP(Y, distribution)
m.ensure_default_constraints() #m = GPy.core.SparseGP(X, likelihood, kernel, Z)
#m.ensure_default_constraints()
m.update_likelihood_approximation() m.update_likelihood_approximation()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -224,10 +221,36 @@ class GradientTests(unittest.TestCase):
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None] X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
k = GPy.kern.rbf(1) + GPy.kern.white(1) k = GPy.kern.rbf(1) + GPy.kern.white(1)
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None] Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
m = GPy.models.FITCClassification(X, Y=Y) m = GPy.models.FITCClassification(X, Y, kernel = k)
m.update_likelihood_approximation() m.update_likelihood_approximation()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def multioutput_regression_1D(self):
X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5
X = np.vstack((X1, X2))
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.rbf(1)
m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad())
def multioutput_sparse_regression_1D(self):
X1 = np.random.rand(500, 1) * 8
X2 = np.random.rand(300, 1) * 5
X = np.vstack((X1, X2))
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.rbf(1)
m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad())
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests, please be (very) patient..." print "Running unit tests, please be (very) patient..."
unittest.main() unittest.main()

View file

@ -8,8 +8,20 @@ import zipfile
import tarfile import tarfile
import datetime import datetime
ipython_notebook = False
if ipython_notebook:
import IPython.core.display
def ipynb_input(varname, prompt=''):
"""Prompt user for input and assign string val to given variable name."""
js_code = ("""
var value = prompt("{prompt}","");
var py_code = "{varname} = '" + value + "'";
IPython.notebook.kernel.execute(py_code);
""").format(prompt=prompt, varname=varname)
return IPython.core.display.Javascript(js_code)
import sys, urllib import sys, urllib
def reporthook(a,b,c): def reporthook(a,b,c):
# ',' at the end of the line is important! # ',' at the end of the line is important!
#print "% 3.1f%% of %d bytes\r" % (min(100, float(a * b) / c * 100), c), #print "% 3.1f%% of %d bytes\r" % (min(100, float(a * b) / c * 100), c),
@ -131,12 +143,16 @@ The database was created with funding from NSF EIA-0196217.""",
'size' : 24229368}, 'size' : 24229368},
} }
def prompt_user(): def prompt_user():
"""Ask user for agreeing to data set licenses.""" """Ask user for agreeing to data set licenses."""
# raw_input returns the empty string for "enter" # raw_input returns the empty string for "enter"
yes = set(['yes', 'y']) yes = set(['yes', 'y'])
no = set(['no','n']) no = set(['no','n'])
choice = ''
if ipython_notebook:
ipynb_input(choice, prompt='provide your answer here')
else:
choice = raw_input().lower() choice = raw_input().lower()
if choice in yes: if choice in yes:
return True return True
@ -146,6 +162,7 @@ def prompt_user():
sys.stdout.write("Please respond with 'yes', 'y' or 'no', 'n'") sys.stdout.write("Please respond with 'yes', 'y' or 'no', 'n'")
return prompt_user() return prompt_user()
def data_available(dataset_name=None): def data_available(dataset_name=None):
"""Check if the data set is available on the local machine already.""" """Check if the data set is available on the local machine already."""
for file_list in data_resources[dataset_name]['files']: for file_list in data_resources[dataset_name]['files']:
@ -524,11 +541,14 @@ def simulation_BGPLVM():
'info': "Simulated test dataset generated in MATLAB to compare BGPLVM between python and MATLAB"} 'info': "Simulated test dataset generated in MATLAB to compare BGPLVM between python and MATLAB"}
def toy_rbf_1d(seed=default_seed, num_samples=500): def toy_rbf_1d(seed=default_seed, num_samples=500):
"""Samples values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1. """
Samples values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1.
:param seed: seed to use for random sampling. :param seed: seed to use for random sampling.
:type seed: int :type seed: int
:param num_samples: number of samples to sample in the function (default 500). :param num_samples: number of samples to sample in the function (default 500).
:type num_samples: int :type num_samples: int
""" """
np.random.seed(seed=seed) np.random.seed(seed=seed)
num_in = 1 num_in = 1
@ -631,11 +651,15 @@ def olympic_marathon_men(data_set='olympic_marathon_men'):
def crescent_data(num_data=200, seed=default_seed): def crescent_data(num_data=200, seed=default_seed):
"""Data set formed from a mixture of four Gaussians. In each class two of the Gaussians are elongated at right angles to each other and offset to form an approximation to the crescent data that is popular in semi-supervised learning as a toy problem. """
Data set formed from a mixture of four Gaussians. In each class two of the Gaussians are elongated at right angles to each other and offset to form an approximation to the crescent data that is popular in semi-supervised learning as a toy problem.
:param num_data_part: number of data to be sampled (default is 200). :param num_data_part: number of data to be sampled (default is 200).
:type num_data: int :type num_data: int
:param seed: random seed to be used for data generation. :param seed: random seed to be used for data generation.
:type seed: int""" :type seed: int
"""
np.random.seed(seed=seed) np.random.seed(seed=seed)
sqrt2 = np.sqrt(2) sqrt2 = np.sqrt(2)
# Rotation matrix # Rotation matrix

63
GPy/util/erfcx.py Normal file
View file

@ -0,0 +1,63 @@
## Copyright (C) 2010 Soren Hauberg
##
## Copyright James Hensman 2011
##
## This program is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or (at
## your option) any later version.
##
## This program is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; see the file COPYING. If not, see
## <http://www.gnu.org/licenses/>.
import numpy as np
def erfcx (arg):
arg = np.atleast_1d(arg)
assert(np.all(np.isreal(arg)),"erfcx: input must be real")
## Get precision dependent thresholds -- or not :p
xneg = -26.628;
xmax = 2.53e+307;
## Allocate output
result = np.zeros (arg.shape)
## Find values where erfcx can be evaluated
idx_neg = (arg < xneg);
idx_max = (arg > xmax);
idx = ~(idx_neg | idx_max);
arg = arg [idx];
## Perform the actual computation
t = 3.97886080735226 / (np.abs (arg) + 3.97886080735226);
u = t - 0.5;
y = (((((((((u * 0.00127109764952614092 + 1.19314022838340944e-4) * u \
- 0.003963850973605135) * u - 8.70779635317295828e-4) * u + \
0.00773672528313526668) * u + 0.00383335126264887303) * u - \
0.0127223813782122755) * u - 0.0133823644533460069) * u + \
0.0161315329733252248) * u + 0.0390976845588484035) * u + \
0.00249367200053503304;
y = ((((((((((((y * u - 0.0838864557023001992) * u - \
0.119463959964325415) * u + 0.0166207924969367356) * u + \
0.357524274449531043) * u + 0.805276408752910567) * u + \
1.18902982909273333) * u + 1.37040217682338167) * u + \
1.31314653831023098) * u + 1.07925515155856677) * u + \
0.774368199119538609) * u + 0.490165080585318424) * u + \
0.275374741597376782) * t;
y [arg < 0] = 2 * np.exp (arg [arg < 0]**2) - y [arg < 0];
## Put the results back into something with the same size is the original input
result [idx] = y;
result [idx_neg] = np.inf;
## result (idx_max) = 0; # not needed as we initialise with zeros
return(result)

View file

@ -27,31 +27,37 @@ except:
_blas_available = False _blas_available = False
def dtrtrs(A, B, lower=0, trans=0, unitdiag=0): def dtrtrs(A, B, lower=0, trans=0, unitdiag=0):
"""Wrapper for lapack dtrtrs function """
Wrapper for lapack dtrtrs function
:param A: Matrix A :param A: Matrix A
:param B: Matrix B :param B: Matrix B
:param lower: is matrix lower (true) or upper (false) :param lower: is matrix lower (true) or upper (false)
:returns: :returns:
""" """
return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag) return lapack.dtrtrs(A, B, lower=lower, trans=trans, unitdiag=unitdiag)
def dpotrs(A, B, lower=0): def dpotrs(A, B, lower=0):
"""Wrapper for lapack dpotrs function """
Wrapper for lapack dpotrs function
:param A: Matrix A :param A: Matrix A
:param B: Matrix B :param B: Matrix B
:param lower: is matrix lower (true) or upper (false) :param lower: is matrix lower (true) or upper (false)
:returns: :returns:
""" """
return lapack.dpotrs(A, B, lower=lower) return lapack.dpotrs(A, B, lower=lower)
def dpotri(A, lower=0): def dpotri(A, lower=0):
"""Wrapper for lapack dpotri function """
Wrapper for lapack dpotri function
:param A: Matrix A :param A: Matrix A
:param lower: is matrix lower (true) or upper (false) :param lower: is matrix lower (true) or upper (false)
:returns: :returns: A inverse
""" """
return lapack.dpotri(A, lower=lower) return lapack.dpotri(A, lower=lower)
@ -65,18 +71,20 @@ def pddet(A):
def trace_dot(a, b): def trace_dot(a, b):
""" """
efficiently compute the trace of the matrix product of a and b Efficiently compute the trace of the matrix product of a and b
""" """
return np.sum(a * b) return np.sum(a * b)
def mdot(*args): def mdot(*args):
"""Multiply all the arguments using matrix product rules. """
Multiply all the arguments using matrix product rules.
The output is equivalent to multiplying the arguments one by one The output is equivalent to multiplying the arguments one by one
from left to right using dot(). from left to right using dot().
Precedence can be controlled by creating tuples of arguments, Precedence can be controlled by creating tuples of arguments,
for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)). for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)).
Note that this means the output of dot(a,b) and mdot(a,b) will differ if Note that this means the output of dot(a,b) and mdot(a,b) will differ if
a or b is a pure tuple of numbers. a or b is a pure tuple of numbers.
""" """
if len(args) == 1: if len(args) == 1:
return args[0] return args[0]
@ -123,14 +131,16 @@ def jitchol(A, maxtries=5):
def jitchol_old(A, maxtries=5): def jitchol_old(A, maxtries=5):
""" """
:param A : An almost pd square matrix :param A: An almost pd square matrix
:rval L: the Cholesky decomposition of A :rval L: the Cholesky decomposition of A
.. Note: .. note:
Adds jitter to K, to enforce positive-definiteness Adds jitter to K, to enforce positive-definiteness
if stuff breaks, please check: if stuff breaks, please check:
np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T) np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T)
""" """
try: try:
return linalg.cholesky(A, lower=True) return linalg.cholesky(A, lower=True)
@ -150,6 +160,7 @@ def jitchol_old(A, maxtries=5):
def pdinv(A, *args): def pdinv(A, *args):
""" """
:param A: A DxD pd numpy array :param A: A DxD pd numpy array
:rval Ai: the inverse of A :rval Ai: the inverse of A
@ -160,6 +171,7 @@ def pdinv(A, *args):
:rtype Li: np.ndarray :rtype Li: np.ndarray
:rval logdet: the log of the determinant of A :rval logdet: the log of the determinant of A
:rtype logdet: float64 :rtype logdet: float64
""" """
L = jitchol(A, *args) L = jitchol(A, *args)
logdet = 2.*np.sum(np.log(np.diag(L))) logdet = 2.*np.sum(np.log(np.diag(L)))
@ -185,14 +197,13 @@ def chol_inv(L):
def multiple_pdinv(A): def multiple_pdinv(A):
""" """
Arguments
---------
:param A: A DxDxN numpy array (each A[:,:,i] is pd) :param A: A DxDxN numpy array (each A[:,:,i] is pd)
Returns :rval invs: the inverses of A
------- :rtype invs: np.ndarray
invs : the inverses of A :rval hld: 0.5* the log of the determinants of A
hld: 0.5* the log of the determinants of A :rtype hld: np.array
""" """
N = A.shape[-1] N = A.shape[-1]
chols = [jitchol(A[:, :, i]) for i in range(N)] chols = [jitchol(A[:, :, i]) for i in range(N)]
@ -206,15 +217,13 @@ def PCA(Y, input_dim):
""" """
Principal component analysis: maximum likelihood solution by SVD Principal component analysis: maximum likelihood solution by SVD
Arguments
---------
:param Y: NxD np.array of data :param Y: NxD np.array of data
:param input_dim: int, dimension of projection :param input_dim: int, dimension of projection
Returns
-------
:rval X: - Nxinput_dim np.array of dimensionality reduced data :rval X: - Nxinput_dim np.array of dimensionality reduced data
W - input_dimxD mapping from X to Y :rval W: - input_dimxD mapping from X to Y
""" """
if not np.allclose(Y.mean(axis=0), 0.0): if not np.allclose(Y.mean(axis=0), 0.0):
print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)" print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
@ -281,11 +290,10 @@ def DSYR_blas(A, x, alpha=1.):
Performs a symmetric rank-1 update operation: Performs a symmetric rank-1 update operation:
A <- A + alpha * np.dot(x,x.T) A <- A + alpha * np.dot(x,x.T)
Arguments
---------
:param A: Symmetric NxN np.array :param A: Symmetric NxN np.array
:param x: Nx1 np.array :param x: Nx1 np.array
:param alpha: scalar :param alpha: scalar
""" """
N = c_int(A.shape[0]) N = c_int(A.shape[0])
LDA = c_int(A.shape[0]) LDA = c_int(A.shape[0])
@ -303,11 +311,10 @@ def DSYR_numpy(A, x, alpha=1.):
Performs a symmetric rank-1 update operation: Performs a symmetric rank-1 update operation:
A <- A + alpha * np.dot(x,x.T) A <- A + alpha * np.dot(x,x.T)
Arguments
---------
:param A: Symmetric NxN np.array :param A: Symmetric NxN np.array
:param x: Nx1 np.array :param x: Nx1 np.array
:param alpha: scalar :param alpha: scalar
""" """
A += alpha * np.dot(x[:, None], x[None, :]) A += alpha * np.dot(x[:, None], x[None, :])
@ -371,8 +378,9 @@ def cholupdate(L, x):
""" """
update the LOWER cholesky factor of a pd matrix IN PLACE update the LOWER cholesky factor of a pd matrix IN PLACE
if L is the lower chol. of K, then this function computes L_ if L is the lower chol. of K, then this function computes L\_
where L_ is the lower chol of K + x*x^T where L\_ is the lower chol of K + x*x^T
""" """
support_code = """ support_code = """
#include <math.h> #include <math.h>

110
GPy/util/ln_diff_erfs.py Normal file
View file

@ -0,0 +1,110 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#Only works for scipy 0.12+
try:
from scipy.special import erfcx, erf
except ImportError:
from scipy.special import erf
from erfcx import erfcx
import numpy as np
def ln_diff_erfs(x1, x2, return_sign=False):
"""Function for stably computing the log of difference of two erfs in a numerically stable manner.
:param x1 : argument of the positive erf
:type x1: ndarray
:param x2 : argument of the negative erf
:type x2: ndarray
:return: tuple containing (log(abs(erf(x1) - erf(x2))), sign(erf(x1) - erf(x2)))
Based on MATLAB code that was written by Antti Honkela and modified by David Luengo and originally derived from code by Neil Lawrence.
"""
x1 = np.require(x1).real
x2 = np.require(x2).real
if x1.size==1:
x1 = np.reshape(x1, (1, 1))
if x2.size==1:
x2 = np.reshape(x2, (1, 1))
if x1.shape==x2.shape:
v = np.zeros_like(x1)
else:
if x1.size==1:
v = np.zeros(x2.shape)
elif x2.size==1:
v = np.zeros(x1.shape)
else:
raise ValueError, "This function does not broadcast unless provided with a scalar."
if x1.size == 1:
x1 = np.tile(x1, x2.shape)
if x2.size == 1:
x2 = np.tile(x2, x1.shape)
sign = np.sign(x1 - x2)
if x1.size == 1:
if sign== -1:
swap = x1
x1 = x2
x2 = swap
else:
I = sign == -1
swap = x1[I]
x1[I] = x2[I]
x2[I] = swap
with np.errstate(divide='ignore'):
# switch off log of zero warnings.
# Case 0: arguments of different sign, no problems with loss of accuracy
I0 = np.logical_or(np.logical_and(x1>0, x2<0), np.logical_and(x2>0, x1<0)) # I1=(x1*x2)<0
# Case 1: x1 = x2 so we have log of zero.
I1 = (x1 == x2)
# Case 2: Both arguments are non-negative
I2 = np.logical_and(x1 > 0, np.logical_and(np.logical_not(I0),
np.logical_not(I1)))
# Case 3: Both arguments are non-positive
I3 = np.logical_and(np.logical_and(np.logical_not(I0),
np.logical_not(I1)),
np.logical_not(I2))
_x2 = x2.flatten()
_x1 = x1.flatten()
for group, flags in zip((0, 1, 2, 3), (I0, I1, I2, I3)):
if np.any(flags):
if not x1.size==1:
_x1 = x1[flags]
if not x2.size==1:
_x2 = x2[flags]
if group==0:
v[flags] = np.log( erf(_x1) - erf(_x2) )
elif group==1:
v[flags] = -np.inf
elif group==2:
v[flags] = np.log(erfcx(_x2)
-erfcx(_x1)*np.exp(_x2**2
-_x1**2)) - _x2**2
elif group==3:
v[flags] = np.log(erfcx(-_x1)
-erfcx(-_x2)*np.exp(_x1**2
-_x2**2))-_x1**2
# TODO: switch back on log of zero warnings.
if return_sign:
return v, sign
else:
if v.size==1:
if sign==-1:
v = v.view('complex64')
v += np.pi*1j
else:
# Need to add in a complex part because argument is negative.
v = v.view('complex64')
v[I] += np.pi*1j
return v

View file

@ -17,12 +17,9 @@ def linear_grid(D, n = 100, min_max = (-100, 100)):
""" """
Creates a D-dimensional grid of n linearly spaced points Creates a D-dimensional grid of n linearly spaced points
Parameters: :param D: dimension of the grid
:param n: number of points
D: dimension of the grid :param min_max: (min, max) list
n: number of points
min_max: (min, max) list
""" """
@ -39,6 +36,7 @@ def kmm_init(X, m = 10):
:param X: data :param X: data
:param m: number of inducing points :param m: number of inducing points
""" """
# compute the distances # compute the distances

View file

@ -92,13 +92,15 @@ class tree:
def swap_vertices(self, i, j): def swap_vertices(self, i, j):
"""Swap two vertices in the tree structure array. """
Swap two vertices in the tree structure array.
swap_vertex swaps the location of two vertices in a tree structure array. swap_vertex swaps the location of two vertices in a tree structure array.
ARG tree : the tree for which two vertices are to be swapped.
ARG i : the index of the first vertex to be swapped. :param tree: the tree for which two vertices are to be swapped.
ARG j : the index of the second vertex to be swapped. :param i: the index of the first vertex to be swapped.
RETURN tree : the tree structure with the two vertex locations :param j: the index of the second vertex to be swapped.
swapped. :rval tree: the tree structure with the two vertex locations swapped.
""" """
store_vertex_i = self.vertices[i] store_vertex_i = self.vertices[i]
store_vertex_j = self.vertices[j] store_vertex_j = self.vertices[j]
@ -117,12 +119,17 @@ class tree:
def rotation_matrix(xangle, yangle, zangle, order='zxy', degrees=False): def rotation_matrix(xangle, yangle, zangle, order='zxy', degrees=False):
"""Compute the rotation matrix for an angle in each direction. """
Compute the rotation matrix for an angle in each direction.
This is a helper function for computing the rotation matrix for a given set of angles in a given order. This is a helper function for computing the rotation matrix for a given set of angles in a given order.
ARG xangle : rotation for x-axis.
ARG yangle : rotation for y-axis. :param xangle: rotation for x-axis.
ARG zangle : rotation for z-axis. :param yangle: rotation for y-axis.
ARG order : the order for the rotations.""" :param zangle: rotation for z-axis.
:param order: the order for the rotations.
"""
if degrees: if degrees:
xangle = math.radians(xangle) xangle = math.radians(xangle)
yangle = math.radians(yangle) yangle = math.radians(yangle)
@ -301,10 +308,12 @@ class acclaim_skeleton(skeleton):
def load_skel(self, file_name): def load_skel(self, file_name):
"""Loads an ASF file into a skeleton structure. """
loads skeleton structure from an acclaim skeleton file. Loads an ASF file into a skeleton structure.
ARG file_name : the file name to load in.
RETURN skel : the skeleton for the file.""" :param file_name: The file name to load in.
"""
fid = open(file_name, 'r') fid = open(file_name, 'r')
self.read_skel(fid) self.read_skel(fid)

35
GPy/util/multioutput.py Normal file
View file

@ -0,0 +1,35 @@
import numpy as np
import warnings
from .. import kern
def build_lcm(input_dim, num_outputs, CK = [], NC = [], W_columns=1,W=None,kappa=None):
#TODO build_icm or build_lcm
"""
Builds a kernel for a linear coregionalization model
:input_dim: Input dimensionality
:num_outputs: Number of outputs
:param CK: List of coregionalized kernels (i.e., this will be multiplied by a coregionalize kernel).
:param K: List of kernels that will be added up together with CK, but won't be multiplied by a coregionalize kernel
:param W_columns: number tuples of the corregionalization parameters 'coregion_W'
:type W_columns: integer
"""
for k in CK:
if k.input_dim <> input_dim:
k.input_dim = input_dim
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
for k in NC:
if k.input_dim <> input_dim + 1:
k.input_dim = input_dim + 1
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
kernel = CK[0].prod(kern.coregionalize(num_outputs,W_columns,W,kappa),tensor=True)
for k in CK[1:]:
k_coreg = kern.coregionalize(num_outputs,W_columns,W,kappa)
kernel += k.prod(k_coreg,tensor=True)
for k in NC:
kernel += k
return kernel

View file

@ -15,7 +15,7 @@ def most_significant_input_dimensions(model, which_indices):
try: try:
input_1, input_2 = np.argsort(model.input_sensitivity())[::-1][:2] input_1, input_2 = np.argsort(model.input_sensitivity())[::-1][:2]
except: except:
raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" raise ValueError, "cannot automatically determine which dimensions to plot, please pass 'which_indices'"
else: else:
input_1, input_2 = which_indices input_1, input_2 = which_indices
return input_1, input_2 return input_1, input_2

32
GPy/util/symbolic.py Normal file
View file

@ -0,0 +1,32 @@
from sympy import Function, S, oo, I, cos, sin
class sinc_grad(Function):
nargs = 1
def fdiff(self, argindex=1):
return ((2-x*x)*sin(self.args[0]) - 2*x*cos(x))/(x*x*x)
@classmethod
def eval(cls, x):
if x is S.Zero:
return S.Zero
else:
return (x*cos(x) - sin(x))/(x*x)
class sinc(Function):
nargs = 1
def fdiff(self, argindex=1):
return sinc_grad(self.args[0])
@classmethod
def eval(cls, x):
if x is S.Zero:
return S.One
else:
return sin(x)/x
def _eval_is_real(self):
return self.args[0].is_real

View file

@ -32,4 +32,15 @@ def std_norm_cdf(x):
x = float(x) x = float(x)
return weave.inline(code,arg_names=['x'],support_code=support_code) return weave.inline(code,arg_names=['x'],support_code=support_code)
def inv_std_norm_cdf(x):
"""
Inverse cumulative standard Gaussian distribution
Based on Winitzki, S. (2008)
"""
z = 2*x -1
ln1z2 = np.log(1-z**2)
a = 8*(np.pi -3)/(3*np.pi*(4-np.pi))
b = 2/(np.pi * a) + ln1z2/2
inv_erf = np.sign(z) * np.sqrt( np.sqrt(b**2 - ln1z2/a) - b )
return np.sqrt(2) * inv_erf

View file

@ -502,11 +502,14 @@ def data_play(Y, visualizer, frame_rate=30):
This example loads in the CMU mocap database (http://mocap.cs.cmu.edu) subject number 35 motion number 01. It then plays it using the mocap_show visualize object. This example loads in the CMU mocap database (http://mocap.cs.cmu.edu) subject number 35 motion number 01. It then plays it using the mocap_show visualize object.
.. code-block:: python
data = GPy.util.datasets.cmu_mocap(subject='35', train_motions=['01']) data = GPy.util.datasets.cmu_mocap(subject='35', train_motions=['01'])
Y = data['Y'] Y = data['Y']
Y[:, 0:3] = 0. # Make figure walk in place Y[:, 0:3] = 0. # Make figure walk in place
visualize = GPy.util.visualize.skeleton_show(Y[0, :], data['skel']) visualize = GPy.util.visualize.skeleton_show(Y[0, :], data['skel'])
GPy.util.visualize.data_play(Y, visualize) GPy.util.visualize.data_play(Y, visualize)
""" """

View file

@ -53,9 +53,11 @@ class TanhWarpingFunction(WarpingFunction):
self.num_parameters = 3 * self.n_terms self.num_parameters = 3 * self.n_terms
def f(self,y,psi): def f(self,y,psi):
"""transform y with f using parameter vector psi """
transform y with f using parameter vector psi
psi = [[a,b,c]] psi = [[a,b,c]]
f = \sum_{terms} a * tanh(b*(y+c)) ::math::`f = \\sum_{terms} a * tanh(b*(y+c))`
""" """
#1. check that number of params is consistent #1. check that number of params is consistent
@ -77,8 +79,7 @@ class TanhWarpingFunction(WarpingFunction):
""" """
calculate the numerical inverse of f calculate the numerical inverse of f
== input == :param iterations: number of N.R. iterations
iterations: number of N.R. iterations
""" """
@ -165,9 +166,11 @@ class TanhWarpingFunction_d(WarpingFunction):
self.num_parameters = 3 * self.n_terms + 1 self.num_parameters = 3 * self.n_terms + 1
def f(self,y,psi): def f(self,y,psi):
"""transform y with f using parameter vector psi """
Transform y with f using parameter vector psi
psi = [[a,b,c]] psi = [[a,b,c]]
f = \sum_{terms} a * tanh(b*(y+c))
:math:`f = \\sum_{terms} a * tanh(b*(y+c))`
""" """
#1. check that number of params is consistent #1. check that number of params is consistent
@ -189,8 +192,7 @@ class TanhWarpingFunction_d(WarpingFunction):
""" """
calculate the numerical inverse of f calculate the numerical inverse of f
== input == :param max_iterations: maximum number of N.R. iterations
iterations: number of N.R. iterations
""" """
@ -214,12 +216,13 @@ class TanhWarpingFunction_d(WarpingFunction):
def fgrad_y(self, y, psi, return_precalc = False): def fgrad_y(self, y, psi, return_precalc = False):
""" """
gradient of f w.r.t to y ([N x 1]) gradient of f w.r.t to y ([N x 1])
returns: Nx1 vector of derivatives, unless return_precalc is true,
then it also returns the precomputed stuff :returns: Nx1 vector of derivatives, unless return_precalc is true, then it also returns the precomputed stuff
""" """
mpsi = psi.copy() mpsi = psi.coSpy()
d = psi[-1] d = psi[-1]
mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3) mpsi = mpsi[:self.num_parameters-1].reshape(self.n_terms, 3)
@ -242,7 +245,7 @@ class TanhWarpingFunction_d(WarpingFunction):
""" """
gradient of f w.r.t to y and psi gradient of f w.r.t to y and psi
returns: NxIx4 tensor of partial derivatives :returns: NxIx4 tensor of partial derivatives
""" """

View file

@ -1,10 +1,57 @@
GPy GPy
=== ===
A Gaussian processes framework in python A Gaussian processes framework in Python.
* [Online documentation](https://gpy.readthedocs.org/en/latest/) * [Online documentation](https://gpy.readthedocs.org/en/latest/)
* [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy) * [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy)
Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png) Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GPy.png)
Compiling documentation:
========================
The documentation is stored in doc/ and is compiled with the Sphinx Python documentation generator, and is written in the reStructuredText format.
The Sphinx documentation is available here: http://sphinx-doc.org/latest/contents.html
Installing dependencies:
------------------------
To compile the documentation, first ensure that Sphinx is installed. On Debian-based systems, this can be achieved as follows:
sudo apt-get install python-pip
sudo pip install sphinx
A LaTeX distribution is also required to compile the equations. Note that the extra packages are necessary to install the unicode packages. To compile the equations to PNG format for use in HTML pages, the package *dvipng* must be installed. IPython is also required. On Debian-based systems, this can be achieved as follows:
sudo apt-get install texlive texlive-latex-extra texlive-base texlive-recommended
sudo apt-get install dvipng
sudo apt-get install ipython
Compiling documentation:
------------------------
The documentation can be compiled as follows:
cd doc
make html
The HTML files are then stored in doc/_build/
Running unit tests:
===================
Ensure nose is installed via pip:
pip install nose
Run nosetests from the root directory of the repository:
nosetests -v

View file

@ -1,91 +1,102 @@
core Package GPy.core package
============ ================
:mod:`core` Package Submodules
------------------- ----------
.. automodule:: GPy.core GPy.core.domains module
:members: -----------------------
:undoc-members:
:show-inheritance:
:mod:`domains` Module
---------------------
.. automodule:: GPy.core.domains .. automodule:: GPy.core.domains
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`fitc` Module GPy.core.fitc module
------------------ --------------------
.. automodule:: GPy.core.fitc .. automodule:: GPy.core.fitc
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`gp` Module GPy.core.gp module
---------------- ------------------
.. automodule:: GPy.core.gp .. automodule:: GPy.core.gp
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`gp_base` Module GPy.core.gp_base module
--------------------- -----------------------
.. automodule:: GPy.core.gp_base .. automodule:: GPy.core.gp_base
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`model` Module GPy.core.mapping module
------------------- -----------------------
.. automodule:: GPy.core.mapping
:members:
:undoc-members:
:show-inheritance:
GPy.core.model module
---------------------
.. automodule:: GPy.core.model .. automodule:: GPy.core.model
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`parameterized` Module GPy.core.parameterized module
--------------------------- -----------------------------
.. automodule:: GPy.core.parameterized .. automodule:: GPy.core.parameterized
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`priors` Module GPy.core.priors module
-------------------- ----------------------
.. automodule:: GPy.core.priors .. automodule:: GPy.core.priors
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`sparse_gp` Module GPy.core.sparse_gp module
----------------------- -------------------------
.. automodule:: GPy.core.sparse_gp .. automodule:: GPy.core.sparse_gp
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`svigp` Module GPy.core.svigp module
------------------- ---------------------
.. automodule:: GPy.core.svigp .. automodule:: GPy.core.svigp
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`transformations` Module GPy.core.transformations module
----------------------------- -------------------------------
.. automodule:: GPy.core.transformations .. automodule:: GPy.core.transformations
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
Module contents
---------------
.. automodule:: GPy.core
:members:
:undoc-members:
:show-inheritance:

View file

@ -1,51 +1,54 @@
examples Package GPy.examples package
================ ====================
:mod:`examples` Package Submodules
----------------------- ----------
.. automodule:: GPy.examples GPy.examples.classification module
:members: ----------------------------------
:undoc-members:
:show-inheritance:
:mod:`classification` Module
----------------------------
.. automodule:: GPy.examples.classification .. automodule:: GPy.examples.classification
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`dimensionality_reduction` Module GPy.examples.dimensionality_reduction module
-------------------------------------- --------------------------------------------
.. automodule:: GPy.examples.dimensionality_reduction .. automodule:: GPy.examples.dimensionality_reduction
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`regression` Module GPy.examples.regression module
------------------------ ------------------------------
.. automodule:: GPy.examples.regression .. automodule:: GPy.examples.regression
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`stochastic` Module GPy.examples.stochastic module
------------------------ ------------------------------
.. automodule:: GPy.examples.stochastic .. automodule:: GPy.examples.stochastic
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`tutorials` Module GPy.examples.tutorials module
----------------------- -----------------------------
.. automodule:: GPy.examples.tutorials .. automodule:: GPy.examples.tutorials
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
Module contents
---------------
.. automodule:: GPy.examples
:members:
:undoc-members:
:show-inheritance:

View file

@ -1,51 +1,62 @@
inference Package GPy.inference package
================= =====================
:mod:`conjugate_gradient_descent` Module Submodules
---------------------------------------- ----------
GPy.inference.conjugate_gradient_descent module
-----------------------------------------------
.. automodule:: GPy.inference.conjugate_gradient_descent .. automodule:: GPy.inference.conjugate_gradient_descent
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`gradient_descent_update_rules` Module GPy.inference.gradient_descent_update_rules module
------------------------------------------- --------------------------------------------------
.. automodule:: GPy.inference.gradient_descent_update_rules .. automodule:: GPy.inference.gradient_descent_update_rules
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`optimization` Module GPy.inference.optimization module
-------------------------- ---------------------------------
.. automodule:: GPy.inference.optimization .. automodule:: GPy.inference.optimization
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`samplers` Module GPy.inference.samplers module
---------------------- -----------------------------
.. automodule:: GPy.inference.samplers .. automodule:: GPy.inference.samplers
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`scg` Module GPy.inference.scg module
----------------- ------------------------
.. automodule:: GPy.inference.scg .. automodule:: GPy.inference.scg
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`sgd` Module GPy.inference.sgd module
----------------- ------------------------
.. automodule:: GPy.inference.sgd .. automodule:: GPy.inference.sgd
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
Module contents
---------------
.. automodule:: GPy.inference
:members:
:undoc-members:
:show-inheritance:

246
doc/GPy.kern.parts.rst Normal file
View file

@ -0,0 +1,246 @@
GPy.kern.parts package
======================
Submodules
----------
GPy.kern.parts.Brownian module
------------------------------
.. automodule:: GPy.kern.parts.Brownian
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.Matern32 module
------------------------------
.. automodule:: GPy.kern.parts.Matern32
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.Matern52 module
------------------------------
.. automodule:: GPy.kern.parts.Matern52
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.bias module
--------------------------
.. automodule:: GPy.kern.parts.bias
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.coregionalize module
-----------------------------------
.. automodule:: GPy.kern.parts.coregionalize
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.exponential module
---------------------------------
.. automodule:: GPy.kern.parts.exponential
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.finite_dimensional module
----------------------------------------
.. automodule:: GPy.kern.parts.finite_dimensional
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.fixed module
---------------------------
.. automodule:: GPy.kern.parts.fixed
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.gibbs module
---------------------------
.. automodule:: GPy.kern.parts.gibbs
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.hetero module
----------------------------
.. automodule:: GPy.kern.parts.hetero
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.hierarchical module
----------------------------------
.. automodule:: GPy.kern.parts.hierarchical
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.independent_outputs module
-----------------------------------------
.. automodule:: GPy.kern.parts.independent_outputs
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.kernpart module
------------------------------
.. automodule:: GPy.kern.parts.kernpart
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.linear module
----------------------------
.. automodule:: GPy.kern.parts.linear
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.mlp module
-------------------------
.. automodule:: GPy.kern.parts.mlp
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.periodic_Matern32 module
---------------------------------------
.. automodule:: GPy.kern.parts.periodic_Matern32
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.periodic_Matern52 module
---------------------------------------
.. automodule:: GPy.kern.parts.periodic_Matern52
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.periodic_exponential module
------------------------------------------
.. automodule:: GPy.kern.parts.periodic_exponential
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.poly module
--------------------------
.. automodule:: GPy.kern.parts.poly
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.prod module
--------------------------
.. automodule:: GPy.kern.parts.prod
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.prod_orthogonal module
-------------------------------------
.. automodule:: GPy.kern.parts.prod_orthogonal
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.rational_quadratic module
----------------------------------------
.. automodule:: GPy.kern.parts.rational_quadratic
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.rbf module
-------------------------
.. automodule:: GPy.kern.parts.rbf
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.rbf_inv module
-----------------------------
.. automodule:: GPy.kern.parts.rbf_inv
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.rbfcos module
----------------------------
.. automodule:: GPy.kern.parts.rbfcos
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.spline module
----------------------------
.. automodule:: GPy.kern.parts.spline
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.symmetric module
-------------------------------
.. automodule:: GPy.kern.parts.symmetric
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.sympykern module
-------------------------------
.. automodule:: GPy.kern.parts.sympykern
:members:
:undoc-members:
:show-inheritance:
GPy.kern.parts.white module
---------------------------
.. automodule:: GPy.kern.parts.white
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: GPy.kern.parts
:members:
:undoc-members:
:show-inheritance:

View file

@ -1,29 +1,5 @@
kern Package GPy.kern package
============ ================
:mod:`kern` Package
-------------------
.. automodule:: GPy.kern
:members:
:undoc-members:
:show-inheritance:
:mod:`constructors` Module
--------------------------
.. automodule:: GPy.kern.constructors
:members:
:undoc-members:
:show-inheritance:
:mod:`kern` Module
------------------
.. automodule:: GPy.kern.kern
:members:
:undoc-members:
:show-inheritance:
Subpackages Subpackages
----------- -----------
@ -32,3 +8,30 @@ Subpackages
GPy.kern.parts GPy.kern.parts
Submodules
----------
GPy.kern.constructors module
----------------------------
.. automodule:: GPy.kern.constructors
:members:
:undoc-members:
:show-inheritance:
GPy.kern.kern module
--------------------
.. automodule:: GPy.kern.kern
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: GPy.kern
:members:
:undoc-members:
:show-inheritance:

View file

@ -0,0 +1,70 @@
GPy.likelihoods.noise_models package
====================================
Submodules
----------
GPy.likelihoods.noise_models.binomial_noise module
--------------------------------------------------
.. automodule:: GPy.likelihoods.noise_models.binomial_noise
:members:
:undoc-members:
:show-inheritance:
GPy.likelihoods.noise_models.exponential_noise module
-----------------------------------------------------
.. automodule:: GPy.likelihoods.noise_models.exponential_noise
:members:
:undoc-members:
:show-inheritance:
GPy.likelihoods.noise_models.gamma_noise module
-----------------------------------------------
.. automodule:: GPy.likelihoods.noise_models.gamma_noise
:members:
:undoc-members:
:show-inheritance:
GPy.likelihoods.noise_models.gaussian_noise module
--------------------------------------------------
.. automodule:: GPy.likelihoods.noise_models.gaussian_noise
:members:
:undoc-members:
:show-inheritance:
GPy.likelihoods.noise_models.gp_transformations module
------------------------------------------------------
.. automodule:: GPy.likelihoods.noise_models.gp_transformations
:members:
:undoc-members:
:show-inheritance:
GPy.likelihoods.noise_models.noise_distributions module
-------------------------------------------------------
.. automodule:: GPy.likelihoods.noise_models.noise_distributions
:members:
:undoc-members:
:show-inheritance:
GPy.likelihoods.noise_models.poisson_noise module
-------------------------------------------------
.. automodule:: GPy.likelihoods.noise_models.poisson_noise
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: GPy.likelihoods.noise_models
:members:
:undoc-members:
:show-inheritance:

View file

@ -1,51 +1,69 @@
likelihoods Package GPy.likelihoods package
=================== =======================
:mod:`likelihoods` Package Subpackages
-------------------------- -----------
.. automodule:: GPy.likelihoods .. toctree::
:members:
:undoc-members:
:show-inheritance:
:mod:`ep` Module GPy.likelihoods.noise_models
----------------
Submodules
----------
GPy.likelihoods.ep module
-------------------------
.. automodule:: GPy.likelihoods.ep .. automodule:: GPy.likelihoods.ep
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`gaussian` Module GPy.likelihoods.ep_mixed_noise module
---------------------- -------------------------------------
.. automodule:: GPy.likelihoods.ep_mixed_noise
:members:
:undoc-members:
:show-inheritance:
GPy.likelihoods.gaussian module
-------------------------------
.. automodule:: GPy.likelihoods.gaussian .. automodule:: GPy.likelihoods.gaussian
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`likelihood` Module GPy.likelihoods.gaussian_mixed_noise module
------------------------ -------------------------------------------
.. automodule:: GPy.likelihoods.gaussian_mixed_noise
:members:
:undoc-members:
:show-inheritance:
GPy.likelihoods.likelihood module
---------------------------------
.. automodule:: GPy.likelihoods.likelihood .. automodule:: GPy.likelihoods.likelihood
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`likelihood_functions` Module GPy.likelihoods.noise_model_constructors module
---------------------------------- -----------------------------------------------
.. automodule:: GPy.likelihoods.likelihood_functions .. automodule:: GPy.likelihoods.noise_model_constructors
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`link_functions` Module
----------------------------
.. automodule:: GPy.likelihoods.link_functions Module contents
---------------
.. automodule:: GPy.likelihoods
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:

38
doc/GPy.mappings.rst Normal file
View file

@ -0,0 +1,38 @@
GPy.mappings package
====================
Submodules
----------
GPy.mappings.kernel module
--------------------------
.. automodule:: GPy.mappings.kernel
:members:
:undoc-members:
:show-inheritance:
GPy.mappings.linear module
--------------------------
.. automodule:: GPy.mappings.linear
:members:
:undoc-members:
:show-inheritance:
GPy.mappings.mlp module
-----------------------
.. automodule:: GPy.mappings.mlp
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: GPy.mappings
:members:
:undoc-members:
:show-inheritance:

View file

@ -1,99 +1,134 @@
models Package GPy.models package
============== ==================
:mod:`models` Package Submodules
--------------------- ----------
.. automodule:: GPy.models GPy.models.bayesian_gplvm module
:members: --------------------------------
:undoc-members:
:show-inheritance:
:mod:`bayesian_gplvm` Module
----------------------------
.. automodule:: GPy.models.bayesian_gplvm .. automodule:: GPy.models.bayesian_gplvm
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`fitc_classification` Module GPy.models.bcgplvm module
--------------------------------- -------------------------
.. automodule:: GPy.models.bcgplvm
:members:
:undoc-members:
:show-inheritance:
GPy.models.fitc_classification module
-------------------------------------
.. automodule:: GPy.models.fitc_classification .. automodule:: GPy.models.fitc_classification
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`gp_classification` Module GPy.models.gp_classification module
------------------------------- -----------------------------------
.. automodule:: GPy.models.gp_classification .. automodule:: GPy.models.gp_classification
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`gp_regression` Module GPy.models.gp_multioutput_regression module
--------------------------- -------------------------------------------
.. automodule:: GPy.models.gp_multioutput_regression
:members:
:undoc-members:
:show-inheritance:
GPy.models.gp_regression module
-------------------------------
.. automodule:: GPy.models.gp_regression .. automodule:: GPy.models.gp_regression
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`gplvm` Module GPy.models.gplvm module
------------------- -----------------------
.. automodule:: GPy.models.gplvm .. automodule:: GPy.models.gplvm
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`mrd` Module GPy.models.gradient_checker module
----------------- ----------------------------------
.. automodule:: GPy.models.gradient_checker
:members:
:undoc-members:
:show-inheritance:
GPy.models.mrd module
---------------------
.. automodule:: GPy.models.mrd .. automodule:: GPy.models.mrd
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`sparse_gp_classification` Module GPy.models.sparse_gp_classification module
-------------------------------------- ------------------------------------------
.. automodule:: GPy.models.sparse_gp_classification .. automodule:: GPy.models.sparse_gp_classification
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`sparse_gp_regression` Module GPy.models.sparse_gp_multioutput_regression module
---------------------------------- --------------------------------------------------
.. automodule:: GPy.models.sparse_gp_multioutput_regression
:members:
:undoc-members:
:show-inheritance:
GPy.models.sparse_gp_regression module
--------------------------------------
.. automodule:: GPy.models.sparse_gp_regression .. automodule:: GPy.models.sparse_gp_regression
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`sparse_gplvm` Module GPy.models.sparse_gplvm module
-------------------------- ------------------------------
.. automodule:: GPy.models.sparse_gplvm .. automodule:: GPy.models.sparse_gplvm
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`svigp_regression` Module GPy.models.svigp_regression module
------------------------------ ----------------------------------
.. automodule:: GPy.models.svigp_regression .. automodule:: GPy.models.svigp_regression
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`warped_gp` Module GPy.models.warped_gp module
----------------------- ---------------------------
.. automodule:: GPy.models.warped_gp .. automodule:: GPy.models.warped_gp
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
Module contents
---------------
.. automodule:: GPy.models
:members:
:undoc-members:
:show-inheritance:

View file

@ -1,14 +1,6 @@
GPy Package GPy package
=========== ===========
:mod:`GPy` Package
------------------
.. automodule:: GPy.__init__
:members:
:undoc-members:
:show-inheritance:
Subpackages Subpackages
----------- -----------
@ -19,7 +11,15 @@ Subpackages
GPy.inference GPy.inference
GPy.kern GPy.kern
GPy.likelihoods GPy.likelihoods
GPy.mappings
GPy.models GPy.models
GPy.testing GPy.testing
GPy.util GPy.util
Module contents
---------------
.. automodule:: GPy
:members:
:undoc-members:
:show-inheritance:

View file

@ -1,107 +1,110 @@
testing Package GPy.testing package
=============== ===================
:mod:`testing` Package Submodules
---------------------- ----------
.. automodule:: GPy.testing GPy.testing.bgplvm_tests module
:members: -------------------------------
:undoc-members:
:show-inheritance:
:mod:`bgplvm_tests` Module
--------------------------
.. automodule:: GPy.testing.bgplvm_tests .. automodule:: GPy.testing.bgplvm_tests
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`cgd_tests` Module GPy.testing.cgd_tests module
----------------------- ----------------------------
.. automodule:: GPy.testing.cgd_tests .. automodule:: GPy.testing.cgd_tests
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`checkgrad` Module GPy.testing.examples_tests module
----------------------- ---------------------------------
.. automodule:: GPy.testing.checkgrad
:members:
:undoc-members:
:show-inheritance:
:mod:`examples_tests` Module
----------------------------
.. automodule:: GPy.testing.examples_tests .. automodule:: GPy.testing.examples_tests
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`gplvm_tests` Module GPy.testing.gplvm_tests module
------------------------- ------------------------------
.. automodule:: GPy.testing.gplvm_tests .. automodule:: GPy.testing.gplvm_tests
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`kernel_tests` Module GPy.testing.kernel_tests module
-------------------------- -------------------------------
.. automodule:: GPy.testing.kernel_tests .. automodule:: GPy.testing.kernel_tests
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`mrd_tests` Module GPy.testing.mapping_tests module
----------------------- --------------------------------
.. automodule:: GPy.testing.mapping_tests
:members:
:undoc-members:
:show-inheritance:
GPy.testing.mrd_tests module
----------------------------
.. automodule:: GPy.testing.mrd_tests .. automodule:: GPy.testing.mrd_tests
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`prior_tests` Module GPy.testing.prior_tests module
------------------------- ------------------------------
.. automodule:: GPy.testing.prior_tests .. automodule:: GPy.testing.prior_tests
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`psi_stat_expactation_tests` Module GPy.testing.psi_stat_expectation_tests module
---------------------------------------- ---------------------------------------------
.. automodule:: GPy.testing.psi_stat_expactation_tests .. automodule:: GPy.testing.psi_stat_expectation_tests
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`psi_stat_gradient_tests` Module GPy.testing.psi_stat_gradient_tests module
------------------------------------- ------------------------------------------
.. automodule:: GPy.testing.psi_stat_gradient_tests .. automodule:: GPy.testing.psi_stat_gradient_tests
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`sparse_gplvm_tests` Module GPy.testing.sparse_gplvm_tests module
-------------------------------- -------------------------------------
.. automodule:: GPy.testing.sparse_gplvm_tests .. automodule:: GPy.testing.sparse_gplvm_tests
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
:mod:`unit_tests` Module GPy.testing.unit_tests module
------------------------ -----------------------------
.. automodule:: GPy.testing.unit_tests .. automodule:: GPy.testing.unit_tests
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
Module contents
---------------
.. automodule:: GPy.testing
:members:
:undoc-members:
:show-inheritance:

View file

@ -0,0 +1,30 @@
GPy.util.latent_space_visualizations.controllers package
========================================================
Submodules
----------
GPy.util.latent_space_visualizations.controllers.axis_event_controller module
-----------------------------------------------------------------------------
.. automodule:: GPy.util.latent_space_visualizations.controllers.axis_event_controller
:members:
:undoc-members:
:show-inheritance:
GPy.util.latent_space_visualizations.controllers.imshow_controller module
-------------------------------------------------------------------------
.. automodule:: GPy.util.latent_space_visualizations.controllers.imshow_controller
:members:
:undoc-members:
:show-inheritance:
Module contents
---------------
.. automodule:: GPy.util.latent_space_visualizations.controllers
:members:
:undoc-members:
:show-inheritance:

Some files were not shown because too many files have changed in this diff Show more