mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-09 12:02:38 +02:00
Merge branch 'linK_functions2' into devel
Conflicts: GPy/core/gp.py GPy/core/gp_base.py GPy/core/sparse_gp.py GPy/examples/regression.py GPy/kern/constructors.py GPy/kern/parts/coregionalise.py GPy/models/__init__.py GPy/models/sparse_gp_classification.py GPy/util/__init__.py
This commit is contained in:
commit
f8c9e6b982
36 changed files with 2144 additions and 404 deletions
|
|
@ -140,7 +140,6 @@ class FITC(SparseGP):
|
||||||
|
|
||||||
dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
|
dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
|
||||||
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
|
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
|
||||||
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
|
|
||||||
|
|
||||||
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
|
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
|
||||||
alpha = mdot(LBiLmipsi1,self.V_star)
|
alpha = mdot(LBiLmipsi1,self.V_star)
|
||||||
|
|
|
||||||
|
|
@ -19,9 +19,6 @@ class GP(GPBase):
|
||||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
:type normalize_X: False|True
|
:type normalize_X: False|True
|
||||||
:rtype: model object
|
:rtype: model object
|
||||||
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
|
|
||||||
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
|
|
||||||
:type powerep: list
|
|
||||||
|
|
||||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||||
|
|
||||||
|
|
@ -105,7 +102,12 @@ class GP(GPBase):
|
||||||
|
|
||||||
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
|
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
|
||||||
"""
|
"""
|
||||||
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
#return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
||||||
|
if not isinstance(self.likelihood,EP):
|
||||||
|
tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
||||||
|
else:
|
||||||
|
tmp = np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
||||||
|
return tmp
|
||||||
|
|
||||||
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
|
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
|
||||||
"""
|
"""
|
||||||
|
|
@ -136,7 +138,7 @@ class GP(GPBase):
|
||||||
:type Xnew: np.ndarray, Nnew x self.input_dim
|
:type Xnew: np.ndarray, Nnew x self.input_dim
|
||||||
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
||||||
:type which_parts: ('all', list of bools)
|
:type which_parts: ('all', list of bools)
|
||||||
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
|
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
||||||
:type full_cov: bool
|
:type full_cov: bool
|
||||||
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
|
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
|
||||||
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
||||||
|
|
@ -153,5 +155,71 @@ class GP(GPBase):
|
||||||
|
|
||||||
# now push through likelihood
|
# now push through likelihood
|
||||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
|
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
|
||||||
|
|
||||||
return mean, var, _025pm, _975pm
|
return mean, var, _025pm, _975pm
|
||||||
|
|
||||||
|
def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
|
||||||
|
"""
|
||||||
|
For a specific output, predict the function at the new point(s) Xnew.
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
:param Xnew: The points at which to make a prediction
|
||||||
|
:type Xnew: np.ndarray, Nnew x self.input_dim
|
||||||
|
:param output: output to predict
|
||||||
|
:type output: integer in {0,..., num_outputs-1}
|
||||||
|
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
||||||
|
:type which_parts: ('all', list of bools)
|
||||||
|
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
||||||
|
:type full_cov: bool
|
||||||
|
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
|
||||||
|
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
||||||
|
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
|
||||||
|
|
||||||
|
.. Note:: For multiple output models only
|
||||||
|
"""
|
||||||
|
assert hasattr(self,'multioutput')
|
||||||
|
index = np.ones_like(Xnew)*output
|
||||||
|
Xnew = np.hstack((Xnew,index))
|
||||||
|
|
||||||
|
# normalize X values
|
||||||
|
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
|
||||||
|
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
|
||||||
|
|
||||||
|
# now push through likelihood
|
||||||
|
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output)
|
||||||
|
return mean, var, _025pm, _975pm
|
||||||
|
|
||||||
|
def _raw_predict_single_output(self, _Xnew, output=0, which_parts='all', full_cov=False,stop=False):
|
||||||
|
"""
|
||||||
|
Internal helper function for making predictions for a specific output,
|
||||||
|
does not account for normalization or likelihood
|
||||||
|
---------
|
||||||
|
|
||||||
|
:param Xnew: The points at which to make a prediction
|
||||||
|
:type Xnew: np.ndarray, Nnew x self.input_dim
|
||||||
|
:param output: output to predict
|
||||||
|
:type output: integer in {0,..., num_outputs-1}
|
||||||
|
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
||||||
|
:type which_parts: ('all', list of bools)
|
||||||
|
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
||||||
|
|
||||||
|
.. Note:: For multiple output models only
|
||||||
|
"""
|
||||||
|
assert hasattr(self,'multioutput')
|
||||||
|
|
||||||
|
# creates an index column and appends it to _Xnew
|
||||||
|
index = np.ones_like(_Xnew)*output
|
||||||
|
_Xnew = np.hstack((_Xnew,index))
|
||||||
|
|
||||||
|
Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
|
||||||
|
KiKx, _ = dpotrs(self.L, np.asfortranarray(Kx), lower=1)
|
||||||
|
mu = np.dot(KiKx.T, self.likelihood.Y)
|
||||||
|
if full_cov:
|
||||||
|
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
|
||||||
|
var = Kxx - np.dot(KiKx.T, Kx)
|
||||||
|
else:
|
||||||
|
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
|
||||||
|
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
|
||||||
|
var = var[:, None]
|
||||||
|
if stop:
|
||||||
|
debug_this # @UndefinedVariable
|
||||||
|
return mu, var
|
||||||
|
|
|
||||||
|
|
@ -57,18 +57,12 @@ class GPBase(Model):
|
||||||
self.X = state.pop()
|
self.X = state.pop()
|
||||||
Model.setstate(self, state)
|
Model.setstate(self, state)
|
||||||
|
|
||||||
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None):
|
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False, fignum=None, ax=None,output=None):
|
||||||
"""
|
"""
|
||||||
Plot the GP's view of the world, where the data is normalized and the
|
Plot the GP's view of the world, where the data is normalized and the
|
||||||
likelihood is Gaussian.
|
|
||||||
|
|
||||||
Plot the posterior of the GP.
|
|
||||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||||
- In two dimsensions, a contour-plot shows the mean predicted function
|
- In two dimsensions, a contour-plot shows the mean predicted function
|
||||||
- In higher dimensions, we've no implemented this yet !TODO!
|
- Not implemented in higher dimensions
|
||||||
|
|
||||||
Can plot only part of the data and part of the posterior functions
|
|
||||||
using which_data and which_functions
|
|
||||||
|
|
||||||
:param samples: the number of a posteriori samples to plot
|
:param samples: the number of a posteriori samples to plot
|
||||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
||||||
|
|
@ -85,7 +79,9 @@ class GPBase(Model):
|
||||||
:param ax: axes to plot on.
|
:param ax: axes to plot on.
|
||||||
:type ax: axes handle
|
:type ax: axes handle
|
||||||
|
|
||||||
"""
|
:param output: which output to plot (for multiple output models only)
|
||||||
|
:type output: integer (first output is 0)
|
||||||
|
"""
|
||||||
if which_data == 'all':
|
if which_data == 'all':
|
||||||
which_data = slice(None)
|
which_data = slice(None)
|
||||||
|
|
||||||
|
|
@ -93,7 +89,7 @@ class GPBase(Model):
|
||||||
fig = pb.figure(num=fignum)
|
fig = pb.figure(num=fignum)
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
|
|
||||||
if self.X.shape[1] == 1:
|
if self.X.shape[1] == 1 and not hasattr(self,'multioutput'):
|
||||||
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
|
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
|
||||||
if samples == 0:
|
if samples == 0:
|
||||||
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
||||||
|
|
@ -111,7 +107,7 @@ class GPBase(Model):
|
||||||
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
||||||
ax.set_ylim(ymin, ymax)
|
ax.set_ylim(ymin, ymax)
|
||||||
|
|
||||||
elif self.X.shape[1] == 2:
|
elif self.X.shape[1] == 2 and not hasattr(self,'multioutput'):
|
||||||
resolution = resolution or 50
|
resolution = resolution or 50
|
||||||
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
|
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
|
||||||
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
||||||
|
|
@ -120,17 +116,51 @@ class GPBase(Model):
|
||||||
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
|
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
|
||||||
ax.set_xlim(xmin[0], xmax[0])
|
ax.set_xlim(xmin[0], xmax[0])
|
||||||
ax.set_ylim(xmin[1], xmax[1])
|
ax.set_ylim(xmin[1], xmax[1])
|
||||||
|
|
||||||
|
|
||||||
|
elif self.X.shape[1] == 2 and hasattr(self,'multioutput'):
|
||||||
|
output -= 1
|
||||||
|
assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs
|
||||||
|
Xu = self.X[self.X[:,-1]==output ,0:1]
|
||||||
|
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
|
||||||
|
|
||||||
|
if samples == 0:
|
||||||
|
m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts)
|
||||||
|
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v), axes=ax)
|
||||||
|
ax.plot(Xu[which_data], self.likelihood.Y[self.likelihood.index==output][:,None], 'kx', mew=1.5)
|
||||||
|
else:
|
||||||
|
m, v = self._raw_predict_single_output(Xnew, output=output, which_parts=which_parts, full_cov=True)
|
||||||
|
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
|
||||||
|
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax)
|
||||||
|
for i in range(samples):
|
||||||
|
ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
||||||
|
ax.set_xlim(xmin, xmax)
|
||||||
|
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
|
||||||
|
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
||||||
|
ax.set_ylim(ymin, ymax)
|
||||||
|
|
||||||
|
if hasattr(self,'Z'):
|
||||||
|
Zu = self.Z[self.Z[:,-1]==output,:]
|
||||||
|
Zu = self.Z * self._Xscale + self._Xoffset
|
||||||
|
Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
|
||||||
|
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
|
||||||
|
|
||||||
|
elif self.X.shape[1] == 3 and hasattr(self,'multioutput'):
|
||||||
|
raise NotImplementedError, "Plots not implemented for multioutput models with 2D inputs...yet"
|
||||||
|
output -= 1
|
||||||
|
assert self.num_outputs >= output, 'The model has only %s outputs.' %self.num_outputs
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||||
|
|
||||||
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
|
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, output=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
|
||||||
"""
|
"""
|
||||||
Plot the GP with noise where the likelihood is Gaussian.
|
Plot the GP with noise where the likelihood is Gaussian.
|
||||||
|
|
||||||
Plot the posterior of the GP.
|
Plot the posterior of the GP.
|
||||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||||
- In two dimsensions, a contour-plot shows the mean predicted function
|
- In two dimsensions, a contour-plot shows the mean predicted function
|
||||||
- In higher dimensions, we've no implemented this yet !TODO!
|
- Not implemented in higher dimensions
|
||||||
|
|
||||||
Can plot only part of the data and part of the posterior functions
|
Can plot only part of the data and part of the posterior functions
|
||||||
using which_data and which_functions
|
using which_data and which_functions
|
||||||
|
|
@ -151,15 +181,13 @@ class GPBase(Model):
|
||||||
:type fignum: figure number
|
:type fignum: figure number
|
||||||
:param ax: axes to plot on.
|
:param ax: axes to plot on.
|
||||||
:type ax: axes handle
|
:type ax: axes handle
|
||||||
|
:type output: integer (first output is 0)
|
||||||
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
||||||
:type fixed_inputs: a list of tuples
|
:type fixed_inputs: a list of tuples
|
||||||
:param linecol: color of line to plot.
|
:param linecol: color of line to plot.
|
||||||
:type linecol:
|
:type linecol:
|
||||||
:param fillcol: color of fill
|
:param fillcol: color of fill
|
||||||
:type fillcol:
|
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
||||||
:param levels: for 2D plotting, the number of contour levels to use
|
|
||||||
is ax is None, create a new figure
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# TODO include samples
|
# TODO include samples
|
||||||
if which_data == 'all':
|
if which_data == 'all':
|
||||||
|
|
@ -169,41 +197,81 @@ class GPBase(Model):
|
||||||
fig = pb.figure(num=fignum)
|
fig = pb.figure(num=fignum)
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
|
|
||||||
plotdims = self.input_dim - len(fixed_inputs)
|
if not hasattr(self,'multioutput'):
|
||||||
|
|
||||||
if plotdims == 1:
|
plotdims = self.input_dim - len(fixed_inputs)
|
||||||
|
if plotdims == 1:
|
||||||
|
resolution = resolution or 200
|
||||||
|
|
||||||
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
|
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
|
||||||
|
|
||||||
fixed_dims = np.array([i for i,v in fixed_inputs])
|
fixed_dims = np.array([i for i,v in fixed_inputs])
|
||||||
freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
|
freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
|
||||||
|
|
||||||
Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
|
Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
|
||||||
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
|
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
|
||||||
Xgrid[:,freedim] = Xnew
|
Xgrid[:,freedim] = Xnew
|
||||||
for i,v in fixed_inputs:
|
for i,v in fixed_inputs:
|
||||||
Xgrid[:,i] = v
|
Xgrid[:,i] = v
|
||||||
|
|
||||||
m, _, lower, upper = self.predict(Xgrid, which_parts=which_parts)
|
m, _, lower, upper = self.predict(Xgrid, which_parts=which_parts)
|
||||||
for d in range(m.shape[1]):
|
for d in range(m.shape[1]):
|
||||||
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
|
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
|
||||||
ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5)
|
ax.plot(Xu[which_data,freedim], self.likelihood.data[which_data, d], 'kx', mew=1.5)
|
||||||
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
|
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
|
||||||
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
||||||
ax.set_xlim(xmin, xmax)
|
ax.set_xlim(xmin, xmax)
|
||||||
ax.set_ylim(ymin, ymax)
|
ax.set_ylim(ymin, ymax)
|
||||||
|
|
||||||
elif self.X.shape[1] == 2: # FIXME
|
|
||||||
resolution = resolution or 50
|
|
||||||
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
|
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits,resolution=resolution)
|
||||||
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
|
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
||||||
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
for d in range(m.shape[1]):
|
||||||
m = m.reshape(resolution, resolution).T
|
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
|
||||||
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
|
ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5)
|
||||||
Yf = self.likelihood.data.flatten()
|
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
|
||||||
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
|
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
||||||
ax.set_xlim(xmin[0], xmax[0])
|
ax.set_xlim(xmin, xmax)
|
||||||
ax.set_ylim(xmin[1], xmax[1])
|
ax.set_ylim(ymin, ymax)
|
||||||
|
|
||||||
|
elif self.X.shape[1] == 2:
|
||||||
|
resolution = resolution or 50
|
||||||
|
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
|
||||||
|
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
|
||||||
|
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
||||||
|
m = m.reshape(resolution, resolution).T
|
||||||
|
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
|
||||||
|
Yf = self.likelihood.Y.flatten()
|
||||||
|
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
|
||||||
|
ax.set_xlim(xmin[0], xmax[0])
|
||||||
|
ax.set_ylim(xmin[1], xmax[1])
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
assert self.num_outputs > output, 'The model has only %s outputs.' %self.num_outputs
|
||||||
|
if self.X.shape[1] == 2:
|
||||||
|
resolution = resolution or 200
|
||||||
|
Xu = self.X[self.X[:,-1]==output,:] #keep the output of interest
|
||||||
|
Xu = self.X * self._Xscale + self._Xoffset
|
||||||
|
Xu = self.X[self.X[:,-1]==output ,0:1] #get rid of the index column
|
||||||
|
|
||||||
|
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
|
||||||
|
m, _, lower, upper = self.predict_single_output(Xnew, which_parts=which_parts,output=output)
|
||||||
|
|
||||||
|
for d in range(m.shape[1]):
|
||||||
|
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
|
||||||
|
ax.plot(Xu[which_data], self.likelihood.noise_model_list[output].data, 'kx', mew=1.5)
|
||||||
|
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
|
||||||
|
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
||||||
|
ax.set_xlim(xmin, xmax)
|
||||||
|
ax.set_ylim(ymin, ymax)
|
||||||
|
|
||||||
|
elif self.X.shape[1] == 3:
|
||||||
|
raise NotImplementedError, "Plots not yet implemented for multioutput models with 2D inputs"
|
||||||
|
resolution = resolution or 50
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||||
|
|
|
||||||
|
|
@ -549,7 +549,7 @@ class Model(Parameterized):
|
||||||
:type optimzer: string TODO: valid strings?
|
:type optimzer: string TODO: valid strings?
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert isinstance(self.likelihood, likelihoods.EP), "pseudo_EM is only available for EP likelihoods"
|
assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods"
|
||||||
ll_change = epsilon + 1.
|
ll_change = epsilon + 1.
|
||||||
iteration = 0
|
iteration = 0
|
||||||
last_ll = -np.inf
|
last_ll = -np.inf
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import numpy as np
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri
|
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri
|
||||||
from scipy import linalg
|
from scipy import linalg
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian, EP,EP_Mixed_Noise
|
||||||
from gp_base import GPBase
|
from gp_base import GPBase
|
||||||
|
|
||||||
class SparseGP(GPBase):
|
class SparseGP(GPBase):
|
||||||
|
|
@ -109,7 +109,6 @@ class SparseGP(GPBase):
|
||||||
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1)
|
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1)
|
||||||
self._A = tdot(tmp)
|
self._A = tdot(tmp)
|
||||||
|
|
||||||
|
|
||||||
# factor B
|
# factor B
|
||||||
self.B = np.eye(self.num_inducing) + self._A
|
self.B = np.eye(self.num_inducing) + self._A
|
||||||
self.LB = jitchol(self.B)
|
self.LB = jitchol(self.B)
|
||||||
|
|
@ -139,6 +138,7 @@ class SparseGP(GPBase):
|
||||||
dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
|
dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
|
||||||
|
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
|
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
|
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
|
||||||
else:
|
else:
|
||||||
|
|
@ -160,9 +160,23 @@ class SparseGP(GPBase):
|
||||||
# save computation here.
|
# save computation here.
|
||||||
self.partial_for_likelihood = None
|
self.partial_for_likelihood = None
|
||||||
elif self.likelihood.is_heteroscedastic:
|
elif self.likelihood.is_heteroscedastic:
|
||||||
raise NotImplementedError, "heteroscedatic derivates not implemented"
|
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
|
||||||
|
|
||||||
|
else:
|
||||||
|
Lmi_psi1, nil = dtrtrs(self.Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0)
|
||||||
|
_LBi_Lmi_psi1, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
|
||||||
|
_Bi_Lmi_psi1, _ = dtrtrs(self.LB.T, np.asfortranarray(_LBi_Lmi_psi1), lower=1, trans=0)
|
||||||
|
|
||||||
|
self.partial_for_likelihood = -0.5 * self.likelihood.precision + 0.5 * self.likelihood.V**2
|
||||||
|
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0 - np.sum(Lmi_psi1**2,0))[:,None] * self.likelihood.precision**2
|
||||||
|
self.partial_for_likelihood += 0.5*np.sum(_Bi_Lmi_psi1*Lmi_psi1,0)[:,None]*self.likelihood.precision**2 #NOTE this term has numerical issues
|
||||||
|
self.partial_for_likelihood += -np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * self.likelihood.Y * self.likelihood.precision**2
|
||||||
|
self.partial_for_likelihood += 0.5*np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * self.likelihood.precision**2
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# likelihood is not heterscedatic
|
# likelihood is not heteroscedatic
|
||||||
self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
|
self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
|
||||||
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision)
|
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision)
|
||||||
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit)
|
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit)
|
||||||
|
|
@ -298,7 +312,7 @@ class SparseGP(GPBase):
|
||||||
:type X_variance_new: np.ndarray, Nnew x self.input_dim
|
:type X_variance_new: np.ndarray, Nnew x self.input_dim
|
||||||
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
||||||
:type which_parts: ('all', list of bools)
|
:type which_parts: ('all', list of bools)
|
||||||
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
|
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
||||||
:type full_cov: bool
|
:type full_cov: bool
|
||||||
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
|
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
|
||||||
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
||||||
|
|
@ -322,18 +336,15 @@ class SparseGP(GPBase):
|
||||||
|
|
||||||
return mean, var, _025pm, _975pm
|
return mean, var, _025pm, _975pm
|
||||||
|
|
||||||
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None):
|
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, fignum=None, ax=None, output=None):
|
||||||
|
|
||||||
if ax is None:
|
if ax is None:
|
||||||
fig = pb.figure(num=fignum)
|
fig = pb.figure(num=fignum)
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
if which_data is 'all':
|
if which_data is 'all':
|
||||||
which_data = slice(None)
|
which_data = slice(None)
|
||||||
|
|
||||||
GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax)
|
GPBase.plot(self, samples=0, plot_limits=plot_limits, which_data='all', which_parts='all', resolution=None, levels=20, ax=ax, output=output)
|
||||||
|
if self.X.shape[1] == 1 and not hasattr(self,'multioutput'):
|
||||||
# add the inducing inputs and some errorbars
|
|
||||||
if self.X.shape[1] == 1:
|
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
|
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
|
||||||
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
|
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
|
||||||
|
|
@ -342,6 +353,109 @@ class SparseGP(GPBase):
|
||||||
Zu = self.Z * self._Xscale + self._Xoffset
|
Zu = self.Z * self._Xscale + self._Xoffset
|
||||||
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
|
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
|
||||||
|
|
||||||
elif self.X.shape[1] == 2:
|
elif self.X.shape[1] == 2 and not hasattr(self,'multioutput'):
|
||||||
Zu = self.Z * self._Xscale + self._Xoffset
|
Zu = self.Z * self._Xscale + self._Xoffset
|
||||||
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
|
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
|
||||||
|
|
||||||
|
elif self.X.shape[1] == 2 and hasattr(self,'multioutput'):
|
||||||
|
Xu = self.X[self.X[:,-1]==output,:]
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
|
||||||
|
|
||||||
|
Xu = self.X[self.X[:,-1]==output ,0:1] #??
|
||||||
|
|
||||||
|
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
|
||||||
|
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
|
||||||
|
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
||||||
|
|
||||||
|
Zu = self.Z[self.Z[:,-1]==output,:]
|
||||||
|
Zu = self.Z * self._Xscale + self._Xoffset
|
||||||
|
Zu = self.Z[self.Z[:,-1]==output ,0:1] #??
|
||||||
|
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
|
||||||
|
#ax.set_ylim(ax.get_ylim()[0],)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||||
|
|
||||||
|
def predict_single_output(self, Xnew, output=0, which_parts='all', full_cov=False):
|
||||||
|
"""
|
||||||
|
For a specific output, predict the function at the new point(s) Xnew.
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
:param Xnew: The points at which to make a prediction
|
||||||
|
:type Xnew: np.ndarray, Nnew x self.input_dim
|
||||||
|
:param output: output to predict
|
||||||
|
:type output: integer in {0,..., num_outputs-1}
|
||||||
|
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
||||||
|
:type which_parts: ('all', list of bools)
|
||||||
|
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
||||||
|
:type full_cov: bool
|
||||||
|
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
|
||||||
|
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
||||||
|
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
|
||||||
|
|
||||||
|
.. Note:: For multiple output models only
|
||||||
|
"""
|
||||||
|
|
||||||
|
assert hasattr(self,'multioutput')
|
||||||
|
index = np.ones_like(Xnew)*output
|
||||||
|
Xnew = np.hstack((Xnew,index))
|
||||||
|
|
||||||
|
# normalize X values
|
||||||
|
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
|
||||||
|
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
|
||||||
|
|
||||||
|
# now push through likelihood
|
||||||
|
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, noise_model = output)
|
||||||
|
return mean, var, _025pm, _975pm
|
||||||
|
|
||||||
|
def _raw_predict_single_output(self, _Xnew, output=0, X_variance_new=None, which_parts='all', full_cov=False,stop=False):
|
||||||
|
"""
|
||||||
|
Internal helper function for making predictions for a specific output,
|
||||||
|
does not account for normalization or likelihood
|
||||||
|
---------
|
||||||
|
|
||||||
|
:param Xnew: The points at which to make a prediction
|
||||||
|
:type Xnew: np.ndarray, Nnew x self.input_dim
|
||||||
|
:param output: output to predict
|
||||||
|
:type output: integer in {0,..., num_outputs-1}
|
||||||
|
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
||||||
|
:type which_parts: ('all', list of bools)
|
||||||
|
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
||||||
|
|
||||||
|
.. Note:: For multiple output models only
|
||||||
|
"""
|
||||||
|
Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work!
|
||||||
|
symmetrify(Bi)
|
||||||
|
Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi)
|
||||||
|
|
||||||
|
if self.Cpsi1V is None:
|
||||||
|
psi1V = np.dot(self.psi1.T,self.likelihood.V)
|
||||||
|
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(psi1V), lower=1, trans=0)
|
||||||
|
tmp, _ = dpotrs(self.LB, tmp, lower=1)
|
||||||
|
self.Cpsi1V, _ = dtrtrs(self.Lm, tmp, lower=1, trans=1)
|
||||||
|
|
||||||
|
assert hasattr(self,'multioutput')
|
||||||
|
index = np.ones_like(_Xnew)*output
|
||||||
|
_Xnew = np.hstack((_Xnew,index))
|
||||||
|
|
||||||
|
if X_variance_new is None:
|
||||||
|
Kx = self.kern.K(self.Z, _Xnew, which_parts=which_parts)
|
||||||
|
mu = np.dot(Kx.T, self.Cpsi1V)
|
||||||
|
if full_cov:
|
||||||
|
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
|
||||||
|
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting
|
||||||
|
else:
|
||||||
|
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
|
||||||
|
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0)
|
||||||
|
else:
|
||||||
|
Kx = self.kern.psi1(self.Z, _Xnew, X_variance_new)
|
||||||
|
mu = np.dot(Kx, self.Cpsi1V)
|
||||||
|
if full_cov:
|
||||||
|
raise NotImplementedError, "TODO"
|
||||||
|
else:
|
||||||
|
Kxx = self.kern.psi0(self.Z, _Xnew, X_variance_new)
|
||||||
|
psi2 = self.kern.psi2(self.Z, _Xnew, X_variance_new)
|
||||||
|
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
|
||||||
|
|
||||||
|
return mu, var[:, None]
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,8 @@ def coregionalisation_toy2(max_iters=100):
|
||||||
Y = np.vstack((Y1, Y2))
|
Y = np.vstack((Y1, Y2))
|
||||||
|
|
||||||
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
|
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
|
||||||
k2 = GPy.kern.coregionalise(2, 1)
|
k2 = GPy.kern.coregionalise(2,1)
|
||||||
k = k1**k2
|
k = k1**k2 #k = k1.prod(k2,tensor=True)
|
||||||
m = GPy.models.GPRegression(X, Y, kernel=k)
|
m = GPy.models.GPRegression(X, Y, kernel=k)
|
||||||
m.constrain_fixed('.*rbf_var', 1.)
|
m.constrain_fixed('.*rbf_var', 1.)
|
||||||
# m.constrain_positive('.*kappa')
|
# m.constrain_positive('.*kappa')
|
||||||
|
|
@ -90,7 +90,6 @@ def coregionalisation_sparse(max_iters=100):
|
||||||
k1 = GPy.kern.rbf(1)
|
k1 = GPy.kern.rbf(1)
|
||||||
k2 = GPy.kern.coregionalise(2, 2)
|
k2 = GPy.kern.coregionalise(2, 2)
|
||||||
k = k1**k2 #.prod(k2, tensor=True) # + GPy.kern.white(2,0.001)
|
k = k1**k2 #.prod(k2, tensor=True) # + GPy.kern.white(2,0.001)
|
||||||
|
|
||||||
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
|
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
|
||||||
m.constrain_fixed('.*rbf_var', 1.)
|
m.constrain_fixed('.*rbf_var', 1.)
|
||||||
m.constrain_fixed('iip')
|
m.constrain_fixed('iip')
|
||||||
|
|
@ -401,8 +400,6 @@ def silhouette(max_iters=100):
|
||||||
print(m)
|
print(m)
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100):
|
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100):
|
||||||
"""Run a 1D example of a sparse GP regression."""
|
"""Run a 1D example of a sparse GP regression."""
|
||||||
# sample inputs and outputs
|
# sample inputs and outputs
|
||||||
|
|
|
||||||
|
|
@ -74,9 +74,9 @@ def gibbs(input_dim,variance=1., mapping=None):
|
||||||
Gibbs and MacKay non-stationary covariance function.
|
Gibbs and MacKay non-stationary covariance function.
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
|
|
||||||
r = sqrt((x_i - x_j)'*(x_i - x_j))
|
r = sqrt((x_i - x_j)'*(x_i - x_j))
|
||||||
|
|
||||||
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
|
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
|
||||||
|
|
||||||
Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
|
Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
|
||||||
|
|
@ -90,7 +90,7 @@ def gibbs(input_dim,variance=1., mapping=None):
|
||||||
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
|
The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
|
||||||
|
|
||||||
:param input_dim: the number of input dimensions
|
:param input_dim: the number of input dimensions
|
||||||
:type input_dim: int
|
:type input_dim: int
|
||||||
:param variance: the variance :math:`\sigma^2`
|
:param variance: the variance :math:`\sigma^2`
|
||||||
:type variance: float
|
:type variance: float
|
||||||
:param mapping: the mapping that gives the lengthscale across the input space.
|
:param mapping: the mapping that gives the lengthscale across the input space.
|
||||||
|
|
@ -340,29 +340,30 @@ def symmetric(k):
|
||||||
k_.parts = [symmetric.Symmetric(p) for p in k.parts]
|
k_.parts = [symmetric.Symmetric(p) for p in k.parts]
|
||||||
return k_
|
return k_
|
||||||
|
|
||||||
def coregionalise(output_dim, rank=1, W=None, kappa=None):
|
def coregionalise(num_outpus,W_columns=1, W=None, kappa=None):
|
||||||
"""
|
"""
|
||||||
Coregionalisation kernel.
|
Coregionlization matrix B, of the form:
|
||||||
|
|
||||||
Used for computing covariance functions of the form
|
|
||||||
.. math::
|
|
||||||
k_2(x, y)=\mathbf{B} k(x, y)
|
|
||||||
where
|
|
||||||
.. math::
|
.. math::
|
||||||
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
|
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
|
||||||
|
|
||||||
:param output_dim: the number of output dimensions
|
An intrinsic/linear coregionalization kernel of the form
|
||||||
:type output_dim: int
|
.. math::
|
||||||
:param rank: the rank of the coregionalisation matrix.
|
k_2(x, y)=\mathbf{B} k(x, y)
|
||||||
:type rank: int
|
|
||||||
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B.
|
it is obtainded as the tensor product between a kernel k(x,y) and B.
|
||||||
:type W: ndarray
|
|
||||||
:param kappa: a diagonal term which allows the outputs to behave independently.
|
:param num_outputs: the number of outputs to corregionalise
|
||||||
|
:type num_outputs: int
|
||||||
|
:param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
|
||||||
|
:type W_colunns: int
|
||||||
|
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B
|
||||||
|
:type W: numpy array of dimensionality (num_outpus, W_columns)
|
||||||
|
:param kappa: a vector which allows the outputs to behave independently
|
||||||
|
:type kappa: numpy array of dimensionality (num_outputs,)
|
||||||
:rtype: kernel object
|
:rtype: kernel object
|
||||||
|
|
||||||
.. Note: see coregionalisation examples in GPy.examples.regression for some usage.
|
|
||||||
"""
|
"""
|
||||||
p = parts.coregionalise.Coregionalise(output_dim,rank,W,kappa)
|
p = parts.coregionalise.Coregionalise(num_outputs,W_columns,W,kappa)
|
||||||
return kern(1,[p])
|
return kern(1,[p])
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,31 +9,34 @@ from scipy import weave
|
||||||
|
|
||||||
class Coregionalise(Kernpart):
|
class Coregionalise(Kernpart):
|
||||||
"""
|
"""
|
||||||
Coregionalisation kernel.
|
Kernel for intrinsic/linear coregionalization models
|
||||||
|
|
||||||
Used for computing covariance functions of the form
|
This kernel has the form
|
||||||
.. math::
|
.. math::
|
||||||
k_2(x, y)=B k(x, y)
|
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
|
||||||
where
|
|
||||||
.. math::
|
|
||||||
B = WW^\top + diag(kappa)
|
|
||||||
|
|
||||||
:param output_dim: the number of output dimensions
|
An intrinsic/linear coregionalization kernel of the form
|
||||||
:type output_dim: int
|
.. math::
|
||||||
:param rank: the rank of the coregionalisation matrix.
|
k_2(x, y)=\mathbf{B} k(x, y)
|
||||||
:type rank: int
|
|
||||||
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B.
|
it is obtainded as the tensor product between a kernel k(x,y) and B.
|
||||||
:type W: ndarray
|
|
||||||
:param kappa: a diagonal term which allows the outputs to behave independently.
|
:param num_outputs: number of outputs to coregionalize
|
||||||
:rtype: kernel object
|
:type num_outputs: int
|
||||||
|
:param W_columns: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
|
||||||
|
:type W_colunns: int
|
||||||
|
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalisation matrix B
|
||||||
|
:type W: numpy array of dimensionality (num_outpus, W_columns)
|
||||||
|
:param kappa: a vector which allows the outputs to behave independently
|
||||||
|
:type kappa: numpy array of dimensionality (num_outputs,)
|
||||||
|
|
||||||
.. Note: see coregionalisation examples in GPy.examples.regression for some usage.
|
.. Note: see coregionalisation examples in GPy.examples.regression for some usage.
|
||||||
"""
|
"""
|
||||||
def __init__(self,output_dim,rank=1, W=None, kappa=None):
|
def __init__(self,num_outputs,W_columns=1, W=None, kappa=None):
|
||||||
self.input_dim = 1
|
self.input_dim = 1
|
||||||
self.name = 'coregion'
|
self.name = 'coregion'
|
||||||
self.output_dim = output_dim
|
self.num_outputs = num_outputs
|
||||||
self.rank = rank
|
self.W_columns = W_columns
|
||||||
if W is None:
|
if W is None:
|
||||||
self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
|
self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
|
||||||
else:
|
else:
|
||||||
|
|
@ -52,12 +55,12 @@ class Coregionalise(Kernpart):
|
||||||
|
|
||||||
def _set_params(self,x):
|
def _set_params(self,x):
|
||||||
assert x.size == self.num_params
|
assert x.size == self.num_params
|
||||||
self.kappa = x[-self.output_dim:]
|
self.kappa = x[-self.num_outputs:]
|
||||||
self.W = x[:-self.output_dim].reshape(self.output_dim,self.rank)
|
self.W = x[:-self.num_outputs].reshape(self.num_outputs,self.W_columns)
|
||||||
self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa)
|
self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa)
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
return sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[]) + ['kappa_%i'%i for i in range(self.output_dim)]
|
return sum([['W%i_%i'%(i,j) for j in range(self.W_columns)] for i in range(self.num_outputs)],[]) + ['kappa_%i'%i for i in range(self.num_outputs)]
|
||||||
|
|
||||||
def K(self,index,index2,target):
|
def K(self,index,index2,target):
|
||||||
index = np.asarray(index,dtype=np.int)
|
index = np.asarray(index,dtype=np.int)
|
||||||
|
|
@ -75,26 +78,26 @@ class Coregionalise(Kernpart):
|
||||||
if index2 is None:
|
if index2 is None:
|
||||||
code="""
|
code="""
|
||||||
for(int i=0;i<N; i++){
|
for(int i=0;i<N; i++){
|
||||||
target[i+i*N] += B[index[i]+output_dim*index[i]];
|
target[i+i*N] += B[index[i]+num_outputs*index[i]];
|
||||||
for(int j=0; j<i; j++){
|
for(int j=0; j<i; j++){
|
||||||
target[j+i*N] += B[index[i]+output_dim*index[j]];
|
target[j+i*N] += B[index[i]+num_outputs*index[j]];
|
||||||
target[i+j*N] += target[j+i*N];
|
target[i+j*N] += target[j+i*N];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
N,B,output_dim = index.size, self.B, self.output_dim
|
N,B,num_outputs = index.size, self.B, self.num_outputs
|
||||||
weave.inline(code,['target','index','N','B','output_dim'])
|
weave.inline(code,['target','index','N','B','num_outputs'])
|
||||||
else:
|
else:
|
||||||
index2 = np.asarray(index2,dtype=np.int)
|
index2 = np.asarray(index2,dtype=np.int)
|
||||||
code="""
|
code="""
|
||||||
for(int i=0;i<num_inducing; i++){
|
for(int i=0;i<num_inducing; i++){
|
||||||
for(int j=0; j<N; j++){
|
for(int j=0; j<N; j++){
|
||||||
target[i+j*num_inducing] += B[output_dim*index[j]+index2[i]];
|
target[i+j*num_inducing] += B[num_outputs*index[j]+index2[i]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
N,num_inducing,B,output_dim = index.size,index2.size, self.B, self.output_dim
|
N,num_inducing,B,num_outputs = index.size,index2.size, self.B, self.num_outputs
|
||||||
weave.inline(code,['target','index','index2','N','num_inducing','B','output_dim'])
|
weave.inline(code,['target','index','index2','N','num_inducing','B','num_outputs'])
|
||||||
|
|
||||||
|
|
||||||
def Kdiag(self,index,target):
|
def Kdiag(self,index,target):
|
||||||
|
|
@ -111,12 +114,12 @@ class Coregionalise(Kernpart):
|
||||||
code="""
|
code="""
|
||||||
for(int i=0; i<num_inducing; i++){
|
for(int i=0; i<num_inducing; i++){
|
||||||
for(int j=0; j<N; j++){
|
for(int j=0; j<N; j++){
|
||||||
dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
|
dL_dK_small[index[j] + num_outputs*index2[i]] += dL_dK[i+j*num_inducing];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
N, num_inducing, output_dim = index.size, index2.size, self.output_dim
|
N, num_inducing, num_outputs = index.size, index2.size, self.num_outputs
|
||||||
weave.inline(code, ['N','num_inducing','output_dim','dL_dK','dL_dK_small','index','index2'])
|
weave.inline(code, ['N','num_inducing','num_outputs','dL_dK','dL_dK_small','index','index2'])
|
||||||
|
|
||||||
dkappa = np.diag(dL_dK_small)
|
dkappa = np.diag(dL_dK_small)
|
||||||
dL_dK_small += dL_dK_small.T
|
dL_dK_small += dL_dK_small.T
|
||||||
|
|
@ -133,8 +136,8 @@ class Coregionalise(Kernpart):
|
||||||
ii,jj = ii.T, jj.T
|
ii,jj = ii.T, jj.T
|
||||||
|
|
||||||
dL_dK_small = np.zeros_like(self.B)
|
dL_dK_small = np.zeros_like(self.B)
|
||||||
for i in range(self.output_dim):
|
for i in range(self.num_outputs):
|
||||||
for j in range(self.output_dim):
|
for j in range(self.num_outputs):
|
||||||
tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
|
tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
|
||||||
dL_dK_small[i,j] = tmp
|
dL_dK_small[i,j] = tmp
|
||||||
|
|
||||||
|
|
@ -146,8 +149,8 @@ class Coregionalise(Kernpart):
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,index,target):
|
def dKdiag_dtheta(self,dL_dKdiag,index,target):
|
||||||
index = np.asarray(index,dtype=np.int).flatten()
|
index = np.asarray(index,dtype=np.int).flatten()
|
||||||
dL_dKdiag_small = np.zeros(self.output_dim)
|
dL_dKdiag_small = np.zeros(self.num_outputs)
|
||||||
for i in range(self.output_dim):
|
for i in range(self.num_outputs):
|
||||||
dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
|
dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
|
||||||
dW = 2.*self.W*dL_dKdiag_small[:,None]
|
dW = 2.*self.W*dL_dKdiag_small[:,None]
|
||||||
dkappa = dL_dKdiag_small
|
dkappa = dL_dKdiag_small
|
||||||
|
|
@ -155,6 +158,3 @@ class Coregionalise(Kernpart):
|
||||||
|
|
||||||
def dK_dX(self,dL_dK,X,X2,target):
|
def dK_dX(self,dL_dK,X,X2,target):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ class Prod(Kernpart):
|
||||||
"""
|
"""
|
||||||
def __init__(self,k1,k2,tensor=False):
|
def __init__(self,k1,k2,tensor=False):
|
||||||
self.num_params = k1.num_params + k2.num_params
|
self.num_params = k1.num_params + k2.num_params
|
||||||
self.name = k1.name + '<times>' + k2.name
|
self.name = '['+k1.name + '(x)' + k2.name +']'
|
||||||
self.k1 = k1
|
self.k1 = k1
|
||||||
self.k2 = k2
|
self.k2 = k2
|
||||||
if tensor:
|
if tensor:
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,7 @@
|
||||||
from ep import EP
|
from ep import EP
|
||||||
|
from ep_mixed_noise import EP_Mixed_Noise
|
||||||
from gaussian import Gaussian
|
from gaussian import Gaussian
|
||||||
|
from gaussian_mixed_noise import Gaussian_Mixed_Noise
|
||||||
|
from noise_model_constructors import *
|
||||||
# TODO: from Laplace import Laplace
|
# TODO: from Laplace import Laplace
|
||||||
import likelihood_functions as functions
|
|
||||||
|
|
|
||||||
|
|
@ -4,23 +4,23 @@ from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
|
||||||
from likelihood import likelihood
|
from likelihood import likelihood
|
||||||
|
|
||||||
class EP(likelihood):
|
class EP(likelihood):
|
||||||
def __init__(self,data,LikelihoodFunction,epsilon=1e-3,power_ep=[1.,1.]):
|
def __init__(self,data,noise_model,epsilon=1e-3,power_ep=[1.,1.]):
|
||||||
"""
|
"""
|
||||||
Expectation Propagation
|
Expectation Propagation
|
||||||
|
|
||||||
Arguments
|
Arguments
|
||||||
---------
|
---------
|
||||||
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
epsilon : Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
|
||||||
LikelihoodFunction : a likelihood function (see likelihood_functions.py)
|
noise_model : a likelihood function (see likelihood_functions.py)
|
||||||
"""
|
"""
|
||||||
self.LikelihoodFunction = LikelihoodFunction
|
self.noise_model = noise_model
|
||||||
self.epsilon = epsilon
|
self.epsilon = epsilon
|
||||||
self.eta, self.delta = power_ep
|
self.eta, self.delta = power_ep
|
||||||
self.data = data
|
self.data = data
|
||||||
self.N, self.output_dim = self.data.shape
|
self.N, self.output_dim = self.data.shape
|
||||||
self.is_heteroscedastic = True
|
self.is_heteroscedastic = True
|
||||||
self.Nparams = 0
|
self.Nparams = 0
|
||||||
self._transf_data = self.LikelihoodFunction._preprocess_values(data)
|
self._transf_data = self.noise_model._preprocess_values(data)
|
||||||
|
|
||||||
#Initial values - Likelihood approximation parameters:
|
#Initial values - Likelihood approximation parameters:
|
||||||
#p(y|f) = t(f|tau_tilde,v_tilde)
|
#p(y|f) = t(f|tau_tilde,v_tilde)
|
||||||
|
|
@ -52,16 +52,23 @@ class EP(likelihood):
|
||||||
def predictive_values(self,mu,var,full_cov):
|
def predictive_values(self,mu,var,full_cov):
|
||||||
if full_cov:
|
if full_cov:
|
||||||
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
||||||
return self.LikelihoodFunction.predictive_values(mu,var)
|
return self.noise_model.predictive_values(mu,var)
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.zeros(0)
|
#return np.zeros(0)
|
||||||
|
return self.noise_model._get_params()
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
return []
|
#return []
|
||||||
|
return self.noise_model._get_param_names()
|
||||||
|
|
||||||
def _set_params(self,p):
|
def _set_params(self,p):
|
||||||
pass # TODO: the EP likelihood might want to take some parameters...
|
#pass # TODO: the EP likelihood might want to take some parameters...
|
||||||
|
self.noise_model._set_params(p)
|
||||||
|
|
||||||
def _gradients(self,partial):
|
def _gradients(self,partial):
|
||||||
return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
|
#return np.zeros(0) # TODO: the EP likelihood might want to take some parameters...
|
||||||
|
return self.noise_model._gradients(partial)
|
||||||
|
|
||||||
def _compute_GP_variables(self):
|
def _compute_GP_variables(self):
|
||||||
#Variables to be called from GP
|
#Variables to be called from GP
|
||||||
|
|
@ -116,7 +123,7 @@ class EP(likelihood):
|
||||||
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
|
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
|
||||||
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
|
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
|
||||||
#Marginal moments
|
#Marginal moments
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
||||||
#Site parameters update
|
#Site parameters update
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
||||||
|
|
@ -206,7 +213,7 @@ class EP(likelihood):
|
||||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
||||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
||||||
#Marginal moments
|
#Marginal moments
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
||||||
#Site parameters update
|
#Site parameters update
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||||
|
|
@ -301,7 +308,7 @@ class EP(likelihood):
|
||||||
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
||||||
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
||||||
#Marginal moments
|
#Marginal moments
|
||||||
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.LikelihoodFunction.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model.moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
||||||
#Site parameters update
|
#Site parameters update
|
||||||
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||||
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||||
|
|
|
||||||
385
GPy/likelihoods/ep_mixed_noise.py
Normal file
385
GPy/likelihoods/ep_mixed_noise.py
Normal file
|
|
@ -0,0 +1,385 @@
|
||||||
|
# Copyright (c) 2013, Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats
|
||||||
|
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
|
||||||
|
from likelihood import likelihood
|
||||||
|
|
||||||
|
class EP_Mixed_Noise(likelihood):
|
||||||
|
def __init__(self,data_list,noise_model_list,epsilon=1e-3,power_ep=[1.,1.]):
|
||||||
|
"""
|
||||||
|
Expectation Propagation
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
:param data_list: list of outputs
|
||||||
|
:param noise_model_list: a list of noise models
|
||||||
|
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations
|
||||||
|
:type epsilon: float
|
||||||
|
:param power_ep: list of power ep parameters
|
||||||
|
"""
|
||||||
|
assert len(data_list) == len(noise_model_list)
|
||||||
|
self.noise_model_list = noise_model_list
|
||||||
|
n_list = [data.size for data in data_list]
|
||||||
|
self.n_models = len(data_list)
|
||||||
|
self.n_params = [noise_model._get_params().size for noise_model in noise_model_list]
|
||||||
|
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.n_models),n_list)])
|
||||||
|
self.epsilon = epsilon
|
||||||
|
self.eta, self.delta = power_ep
|
||||||
|
self.data = np.vstack(data_list)
|
||||||
|
self.N, self.output_dim = self.data.shape
|
||||||
|
self.is_heteroscedastic = True
|
||||||
|
self.Nparams = 0#FIXME
|
||||||
|
self._transf_data = np.vstack([noise_model._preprocess_values(data) for noise_model,data in zip(noise_model_list,data_list)])
|
||||||
|
#TODO non-gaussian index
|
||||||
|
|
||||||
|
#Initial values - Likelihood approximation parameters:
|
||||||
|
#p(y|f) = t(f|tau_tilde,v_tilde)
|
||||||
|
self.tau_tilde = np.zeros(self.N)
|
||||||
|
self.v_tilde = np.zeros(self.N)
|
||||||
|
|
||||||
|
#initial values for the GP variables
|
||||||
|
self.Y = np.zeros((self.N,1))
|
||||||
|
self.covariance_matrix = np.eye(self.N)
|
||||||
|
self.precision = np.ones(self.N)[:,None]
|
||||||
|
self.Z = 0
|
||||||
|
self.YYT = None
|
||||||
|
self.V = self.precision * self.Y
|
||||||
|
self.VVT_factor = self.V
|
||||||
|
self.trYYT = 0.
|
||||||
|
|
||||||
|
def restart(self):
|
||||||
|
self.tau_tilde = np.zeros(self.N)
|
||||||
|
self.v_tilde = np.zeros(self.N)
|
||||||
|
self.Y = np.zeros((self.N,1))
|
||||||
|
self.covariance_matrix = np.eye(self.N)
|
||||||
|
self.precision = np.ones(self.N)[:,None]
|
||||||
|
self.Z = 0
|
||||||
|
self.YYT = None
|
||||||
|
self.V = self.precision * self.Y
|
||||||
|
self.VVT_factor = self.V
|
||||||
|
self.trYYT = 0.
|
||||||
|
|
||||||
|
def predictive_values(self,mu,var,full_cov,noise_model):
|
||||||
|
"""
|
||||||
|
Predicts the output given the GP
|
||||||
|
|
||||||
|
:param mu: GP's mean
|
||||||
|
:param var: GP's variance
|
||||||
|
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
||||||
|
:type full_cov: False|True
|
||||||
|
:param noise_model: noise model to use
|
||||||
|
:type noise_model: integer
|
||||||
|
"""
|
||||||
|
if full_cov:
|
||||||
|
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
||||||
|
#_mu = []
|
||||||
|
#_var = []
|
||||||
|
#_q1 = []
|
||||||
|
#_q2 = []
|
||||||
|
#for m,v,o in zip(mu,var,output.flatten()):
|
||||||
|
# a,b,c,d = self.noise_model_list[int(o)].predictive_values(m,v)
|
||||||
|
# _mu.append(a)
|
||||||
|
# _var.append(b)
|
||||||
|
# _q1.append(c)
|
||||||
|
# _q2.append(d)
|
||||||
|
#return np.vstack(_mu),np.vstack(_var),np.vstack(_q1),np.vstack(_q2)
|
||||||
|
return self.noise_model_list[noise_model].predictive_values(mu,var)
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list])
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
names = []
|
||||||
|
for noise_model in self.noise_model_list:
|
||||||
|
names += noise_model._get_param_names()
|
||||||
|
return names
|
||||||
|
|
||||||
|
def _set_params(self,p):
|
||||||
|
cs_params = np.cumsum([0]+self.n_params)
|
||||||
|
for i in range(len(self.n_params)):
|
||||||
|
self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]])
|
||||||
|
|
||||||
|
def _gradients(self,partial):
|
||||||
|
#NOTE this is not tested
|
||||||
|
return np.hstack([noise_model._gradients(partial) for noise_model in self.noise_model_list])
|
||||||
|
|
||||||
|
def _compute_GP_variables(self):
|
||||||
|
#Variables to be called from GP
|
||||||
|
mu_tilde = self.v_tilde/self.tau_tilde #When calling EP, this variable is used instead of Y in the GP model
|
||||||
|
sigma_sum = 1./self.tau_ + 1./self.tau_tilde
|
||||||
|
mu_diff_2 = (self.v_/self.tau_ - mu_tilde)**2
|
||||||
|
self.Z = np.sum(np.log(self.Z_hat)) + 0.5*np.sum(np.log(sigma_sum)) + 0.5*np.sum(mu_diff_2/sigma_sum) #Normalization constant, aka Z_ep
|
||||||
|
|
||||||
|
self.Y = mu_tilde[:,None]
|
||||||
|
self.YYT = np.dot(self.Y,self.Y.T)
|
||||||
|
self.covariance_matrix = np.diag(1./self.tau_tilde)
|
||||||
|
self.precision = self.tau_tilde[:,None]
|
||||||
|
self.V = self.precision * self.Y
|
||||||
|
self.VVT_factor = self.V
|
||||||
|
self.trYYT = np.trace(self.YYT)
|
||||||
|
|
||||||
|
def fit_full(self,K):
|
||||||
|
"""
|
||||||
|
The expectation-propagation algorithm.
|
||||||
|
For nomenclature see Rasmussen & Williams 2006.
|
||||||
|
"""
|
||||||
|
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
||||||
|
mu = np.zeros(self.N)
|
||||||
|
Sigma = K.copy()
|
||||||
|
|
||||||
|
"""
|
||||||
|
Initial values - Cavity distribution parameters:
|
||||||
|
q_(f|mu_,sigma2_) = Product{q_i(f|mu_i,sigma2_i)}
|
||||||
|
sigma_ = 1./tau_
|
||||||
|
mu_ = v_/tau_
|
||||||
|
"""
|
||||||
|
self.tau_ = np.empty(self.N,dtype=float)
|
||||||
|
self.v_ = np.empty(self.N,dtype=float)
|
||||||
|
|
||||||
|
#Initial values - Marginal moments
|
||||||
|
z = np.empty(self.N,dtype=float)
|
||||||
|
self.Z_hat = np.empty(self.N,dtype=float)
|
||||||
|
phi = np.empty(self.N,dtype=float)
|
||||||
|
mu_hat = np.empty(self.N,dtype=float)
|
||||||
|
sigma2_hat = np.empty(self.N,dtype=float)
|
||||||
|
|
||||||
|
#Approximation
|
||||||
|
epsilon_np1 = self.epsilon + 1.
|
||||||
|
epsilon_np2 = self.epsilon + 1.
|
||||||
|
self.iterations = 0
|
||||||
|
self.np1 = [self.tau_tilde.copy()]
|
||||||
|
self.np2 = [self.v_tilde.copy()]
|
||||||
|
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
||||||
|
update_order = np.random.permutation(self.N)
|
||||||
|
for i in update_order:
|
||||||
|
#Cavity distribution parameters
|
||||||
|
self.tau_[i] = 1./Sigma[i,i] - self.eta*self.tau_tilde[i]
|
||||||
|
self.v_[i] = mu[i]/Sigma[i,i] - self.eta*self.v_tilde[i]
|
||||||
|
#Marginal moments
|
||||||
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
||||||
|
#Site parameters update
|
||||||
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
||||||
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
||||||
|
self.tau_tilde[i] += Delta_tau
|
||||||
|
self.v_tilde[i] += Delta_v
|
||||||
|
#Posterior distribution parameters update
|
||||||
|
DSYR(Sigma,Sigma[:,i].copy(), -float(Delta_tau/(1.+ Delta_tau*Sigma[i,i])))
|
||||||
|
mu = np.dot(Sigma,self.v_tilde)
|
||||||
|
self.iterations += 1
|
||||||
|
#Sigma recomptutation with Cholesky decompositon
|
||||||
|
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
|
||||||
|
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
|
||||||
|
L = jitchol(B)
|
||||||
|
V,info = dtrtrs(L,Sroot_tilde_K,lower=1)
|
||||||
|
Sigma = K - np.dot(V.T,V)
|
||||||
|
mu = np.dot(Sigma,self.v_tilde)
|
||||||
|
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
|
||||||
|
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
|
||||||
|
self.np1.append(self.tau_tilde.copy())
|
||||||
|
self.np2.append(self.v_tilde.copy())
|
||||||
|
|
||||||
|
return self._compute_GP_variables()
|
||||||
|
|
||||||
|
def fit_DTC(self, Kmm, Kmn):
|
||||||
|
"""
|
||||||
|
The expectation-propagation algorithm with sparse pseudo-input.
|
||||||
|
For nomenclature see ... 2013.
|
||||||
|
"""
|
||||||
|
num_inducing = Kmm.shape[0]
|
||||||
|
|
||||||
|
#TODO: this doesn't work with uncertain inputs!
|
||||||
|
|
||||||
|
"""
|
||||||
|
Prior approximation parameters:
|
||||||
|
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
||||||
|
Sigma0 = Qnn = Knm*Kmmi*Kmn
|
||||||
|
"""
|
||||||
|
KmnKnm = np.dot(Kmn,Kmn.T)
|
||||||
|
Lm = jitchol(Kmm)
|
||||||
|
Lmi = chol_inv(Lm)
|
||||||
|
Kmmi = np.dot(Lmi.T,Lmi)
|
||||||
|
KmmiKmn = np.dot(Kmmi,Kmn)
|
||||||
|
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
||||||
|
LLT0 = Kmm.copy()
|
||||||
|
|
||||||
|
#Kmmi, Lm, Lmi, Kmm_logdet = pdinv(Kmm)
|
||||||
|
#KmnKnm = np.dot(Kmn, Kmn.T)
|
||||||
|
#KmmiKmn = np.dot(Kmmi,Kmn)
|
||||||
|
#Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
||||||
|
#LLT0 = Kmm.copy()
|
||||||
|
|
||||||
|
"""
|
||||||
|
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
||||||
|
Sigma = Diag + P*R.T*R*P.T + K
|
||||||
|
mu = w + P*Gamma
|
||||||
|
"""
|
||||||
|
mu = np.zeros(self.N)
|
||||||
|
LLT = Kmm.copy()
|
||||||
|
Sigma_diag = Qnn_diag.copy()
|
||||||
|
|
||||||
|
"""
|
||||||
|
Initial values - Cavity distribution parameters:
|
||||||
|
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
||||||
|
sigma_ = 1./tau_
|
||||||
|
mu_ = v_/tau_
|
||||||
|
"""
|
||||||
|
self.tau_ = np.empty(self.N,dtype=float)
|
||||||
|
self.v_ = np.empty(self.N,dtype=float)
|
||||||
|
|
||||||
|
#Initial values - Marginal moments
|
||||||
|
z = np.empty(self.N,dtype=float)
|
||||||
|
self.Z_hat = np.empty(self.N,dtype=float)
|
||||||
|
phi = np.empty(self.N,dtype=float)
|
||||||
|
mu_hat = np.empty(self.N,dtype=float)
|
||||||
|
sigma2_hat = np.empty(self.N,dtype=float)
|
||||||
|
|
||||||
|
#Approximation
|
||||||
|
epsilon_np1 = 1
|
||||||
|
epsilon_np2 = 1
|
||||||
|
self.iterations = 0
|
||||||
|
np1 = [self.tau_tilde.copy()]
|
||||||
|
np2 = [self.v_tilde.copy()]
|
||||||
|
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
||||||
|
update_order = np.random.permutation(self.N)
|
||||||
|
for i in update_order:
|
||||||
|
#Cavity distribution parameters
|
||||||
|
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
||||||
|
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
||||||
|
#Marginal moments
|
||||||
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
||||||
|
#Site parameters update
|
||||||
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||||
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||||
|
self.tau_tilde[i] += Delta_tau
|
||||||
|
self.v_tilde[i] += Delta_v
|
||||||
|
#Posterior distribution parameters update
|
||||||
|
DSYR(LLT,Kmn[:,i].copy(),Delta_tau) #LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
|
||||||
|
L = jitchol(LLT)
|
||||||
|
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
|
||||||
|
V,info = dtrtrs(L,Kmn,lower=1)
|
||||||
|
Sigma_diag = np.sum(V*V,-2)
|
||||||
|
si = np.sum(V.T*V[:,i],-1)
|
||||||
|
mu += (Delta_v-Delta_tau*mu[i])*si
|
||||||
|
self.iterations += 1
|
||||||
|
#Sigma recomputation with Cholesky decompositon
|
||||||
|
LLT = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
|
||||||
|
L = jitchol(LLT)
|
||||||
|
V,info = dtrtrs(L,Kmn,lower=1)
|
||||||
|
V2,info = dtrtrs(L.T,V,lower=0)
|
||||||
|
Sigma_diag = np.sum(V*V,-2)
|
||||||
|
Knmv_tilde = np.dot(Kmn,self.v_tilde)
|
||||||
|
mu = np.dot(V2.T,Knmv_tilde)
|
||||||
|
epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N
|
||||||
|
epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N
|
||||||
|
np1.append(self.tau_tilde.copy())
|
||||||
|
np2.append(self.v_tilde.copy())
|
||||||
|
|
||||||
|
self._compute_GP_variables()
|
||||||
|
|
||||||
|
def fit_FITC(self, Kmm, Kmn, Knn_diag):
|
||||||
|
"""
|
||||||
|
The expectation-propagation algorithm with sparse pseudo-input.
|
||||||
|
For nomenclature see Naish-Guzman and Holden, 2008.
|
||||||
|
"""
|
||||||
|
num_inducing = Kmm.shape[0]
|
||||||
|
|
||||||
|
"""
|
||||||
|
Prior approximation parameters:
|
||||||
|
q(f|X) = int_{df}{N(f|KfuKuu_invu,diag(Kff-Qff)*N(u|0,Kuu)} = N(f|0,Sigma0)
|
||||||
|
Sigma0 = diag(Knn-Qnn) + Qnn, Qnn = Knm*Kmmi*Kmn
|
||||||
|
"""
|
||||||
|
Lm = jitchol(Kmm)
|
||||||
|
Lmi = chol_inv(Lm)
|
||||||
|
Kmmi = np.dot(Lmi.T,Lmi)
|
||||||
|
P0 = Kmn.T
|
||||||
|
KmnKnm = np.dot(P0.T, P0)
|
||||||
|
KmmiKmn = np.dot(Kmmi,P0.T)
|
||||||
|
Qnn_diag = np.sum(P0.T*KmmiKmn,-2)
|
||||||
|
Diag0 = Knn_diag - Qnn_diag
|
||||||
|
R0 = jitchol(Kmmi).T
|
||||||
|
|
||||||
|
"""
|
||||||
|
Posterior approximation: q(f|y) = N(f| mu, Sigma)
|
||||||
|
Sigma = Diag + P*R.T*R*P.T + K
|
||||||
|
mu = w + P*Gamma
|
||||||
|
"""
|
||||||
|
self.w = np.zeros(self.N)
|
||||||
|
self.Gamma = np.zeros(num_inducing)
|
||||||
|
mu = np.zeros(self.N)
|
||||||
|
P = P0.copy()
|
||||||
|
R = R0.copy()
|
||||||
|
Diag = Diag0.copy()
|
||||||
|
Sigma_diag = Knn_diag
|
||||||
|
RPT0 = np.dot(R0,P0.T)
|
||||||
|
|
||||||
|
"""
|
||||||
|
Initial values - Cavity distribution parameters:
|
||||||
|
q_(g|mu_,sigma2_) = Product{q_i(g|mu_i,sigma2_i)}
|
||||||
|
sigma_ = 1./tau_
|
||||||
|
mu_ = v_/tau_
|
||||||
|
"""
|
||||||
|
self.tau_ = np.empty(self.N,dtype=float)
|
||||||
|
self.v_ = np.empty(self.N,dtype=float)
|
||||||
|
|
||||||
|
#Initial values - Marginal moments
|
||||||
|
z = np.empty(self.N,dtype=float)
|
||||||
|
self.Z_hat = np.empty(self.N,dtype=float)
|
||||||
|
phi = np.empty(self.N,dtype=float)
|
||||||
|
mu_hat = np.empty(self.N,dtype=float)
|
||||||
|
sigma2_hat = np.empty(self.N,dtype=float)
|
||||||
|
|
||||||
|
#Approximation
|
||||||
|
epsilon_np1 = 1
|
||||||
|
epsilon_np2 = 1
|
||||||
|
self.iterations = 0
|
||||||
|
self.np1 = [self.tau_tilde.copy()]
|
||||||
|
self.np2 = [self.v_tilde.copy()]
|
||||||
|
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
|
||||||
|
update_order = np.random.permutation(self.N)
|
||||||
|
for i in update_order:
|
||||||
|
#Cavity distribution parameters
|
||||||
|
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
|
||||||
|
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
|
||||||
|
#Marginal moments
|
||||||
|
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.noise_model_list[self.index[i]].moments_match(self._transf_data[i],self.tau_[i],self.v_[i])
|
||||||
|
#Site parameters update
|
||||||
|
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||||
|
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||||
|
self.tau_tilde[i] += Delta_tau
|
||||||
|
self.v_tilde[i] += Delta_v
|
||||||
|
#Posterior distribution parameters update
|
||||||
|
dtd1 = Delta_tau*Diag[i] + 1.
|
||||||
|
dii = Diag[i]
|
||||||
|
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
|
||||||
|
pi_ = P[i,:].reshape(1,num_inducing)
|
||||||
|
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
|
||||||
|
Rp_i = np.dot(R,pi_.T)
|
||||||
|
RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
|
||||||
|
R = jitchol(RTR).T
|
||||||
|
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
|
||||||
|
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
|
||||||
|
RPT = np.dot(R,P.T)
|
||||||
|
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
||||||
|
mu = self.w + np.dot(P,self.Gamma)
|
||||||
|
self.iterations += 1
|
||||||
|
#Sigma recomptutation with Cholesky decompositon
|
||||||
|
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
|
||||||
|
Diag = Diag0 * Iplus_Dprod_i
|
||||||
|
P = Iplus_Dprod_i[:,None] * P0
|
||||||
|
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
|
||||||
|
L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
|
||||||
|
R,info = dtrtrs(L,R0,lower=1)
|
||||||
|
RPT = np.dot(R,P.T)
|
||||||
|
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
||||||
|
self.w = Diag * self.v_tilde
|
||||||
|
self.Gamma = np.dot(R.T, np.dot(RPT,self.v_tilde))
|
||||||
|
mu = self.w + np.dot(P,self.Gamma)
|
||||||
|
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
|
||||||
|
epsilon_np2 = sum((self.v_tilde-self.np2[-1])**2)/self.N
|
||||||
|
self.np1.append(self.tau_tilde.copy())
|
||||||
|
self.np2.append(self.v_tilde.copy())
|
||||||
|
|
||||||
|
return self._compute_GP_variables()
|
||||||
|
|
@ -7,9 +7,9 @@ class Gaussian(likelihood):
|
||||||
"""
|
"""
|
||||||
Likelihood class for doing Expectation propagation
|
Likelihood class for doing Expectation propagation
|
||||||
|
|
||||||
:param Y: observed output (Nx1 numpy.darray)
|
:param data: observed output
|
||||||
..Note:: Y values allowed depend on the likelihood_function used
|
:type data: Nx1 numpy.darray
|
||||||
:param variance :
|
:param variance: noise parameter
|
||||||
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
|
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
|
||||||
:type normalize: False|True
|
:type normalize: False|True
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
106
GPy/likelihoods/gaussian_mixed_noise.py
Normal file
106
GPy/likelihoods/gaussian_mixed_noise.py
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
# Copyright (c) 2013, Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats
|
||||||
|
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs
|
||||||
|
from likelihood import likelihood
|
||||||
|
from . import Gaussian
|
||||||
|
|
||||||
|
|
||||||
|
class Gaussian_Mixed_Noise(likelihood):
|
||||||
|
"""
|
||||||
|
Gaussian Likelihood for multiple outputs
|
||||||
|
|
||||||
|
This is a wrapper around likelihood.Gaussian class
|
||||||
|
|
||||||
|
:param data_list: data observations
|
||||||
|
:type data_list: list of numpy arrays (num_data_output_i x 1), one array per output
|
||||||
|
:param noise_params: noise parameters of each output
|
||||||
|
:type noise_params: list of floats, one per output
|
||||||
|
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
|
||||||
|
:type normalize: False|True
|
||||||
|
"""
|
||||||
|
def __init__(self, data_list, noise_params=None, normalize=True):
|
||||||
|
self.Nparams = len(data_list)
|
||||||
|
self.n_list = [data.size for data in data_list]
|
||||||
|
self.index = np.vstack([np.repeat(i,n)[:,None] for i,n in zip(range(self.Nparams),self.n_list)])
|
||||||
|
|
||||||
|
if noise_params is None:
|
||||||
|
noise_params = [1.] * self.Nparams
|
||||||
|
else:
|
||||||
|
assert self.Nparams == len(noise_params), 'Number of noise parameters does not match the number of noise models.'
|
||||||
|
|
||||||
|
self.noise_model_list = [Gaussian(Y,variance=v,normalize = normalize) for Y,v in zip(data_list,noise_params)]
|
||||||
|
self.n_params = [noise_model._get_params().size for noise_model in self.noise_model_list]
|
||||||
|
self.data = np.vstack(data_list)
|
||||||
|
self.N, self.output_dim = self.data.shape
|
||||||
|
self._offset = np.zeros((1, self.output_dim))
|
||||||
|
self._scale = np.ones((1, self.output_dim))
|
||||||
|
|
||||||
|
self.is_heteroscedastic = True
|
||||||
|
self.Z = 0. # a correction factor which accounts for the approximation made
|
||||||
|
|
||||||
|
self.set_data(data_list)
|
||||||
|
self._set_params(np.asarray(noise_params))
|
||||||
|
|
||||||
|
def set_data(self, data_list):
|
||||||
|
self.data = np.vstack(data_list)
|
||||||
|
self.N, D = self.data.shape
|
||||||
|
assert D == self.output_dim
|
||||||
|
self.Y = (self.data - self._offset) / self._scale
|
||||||
|
if D > self.N:
|
||||||
|
raise NotImplementedError
|
||||||
|
#self.YYT = np.dot(self.Y, self.Y.T)
|
||||||
|
#self.trYYT = np.trace(self.YYT)
|
||||||
|
#self.YYT_factor = jitchol(self.YYT)
|
||||||
|
else:
|
||||||
|
self.YYT = None
|
||||||
|
self.trYYT = np.sum(np.square(self.Y))
|
||||||
|
self.YYT_factor = self.Y
|
||||||
|
|
||||||
|
def predictive_values(self,mu,var,full_cov,noise_model):
|
||||||
|
"""
|
||||||
|
Predicts the output given the GP
|
||||||
|
|
||||||
|
:param mu: GP's mean
|
||||||
|
:param var: GP's variance
|
||||||
|
:param full_cov: whether to return the full covariance matrix, or just the diagonal
|
||||||
|
:type full_cov: False|True
|
||||||
|
:param noise_model: noise model to use
|
||||||
|
:type noise_model: integer
|
||||||
|
"""
|
||||||
|
if full_cov:
|
||||||
|
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
||||||
|
return self.noise_model_list[noise_model].predictive_values(mu,var,full_cov)
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.hstack([noise_model._get_params().flatten() for noise_model in self.noise_model_list])
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
if len(self.noise_model_list) == 1:
|
||||||
|
names = self.noise_model_list[0]._get_param_names()
|
||||||
|
else:
|
||||||
|
names = []
|
||||||
|
for noise_model,i in zip(self.noise_model_list,range(len(self.n_list))):
|
||||||
|
names.append(''.join(noise_model._get_param_names() + ['_%s' %i]))
|
||||||
|
return names
|
||||||
|
|
||||||
|
def _set_params(self,p):
|
||||||
|
cs_params = np.cumsum([0]+self.n_params)
|
||||||
|
|
||||||
|
for i in range(len(self.n_params)):
|
||||||
|
self.noise_model_list[i]._set_params(p[cs_params[i]:cs_params[i+1]])
|
||||||
|
self.precision = np.hstack([np.repeat(noise_model.precision,n) for noise_model,n in zip(self.noise_model_list,self.n_list)])[:,None]
|
||||||
|
|
||||||
|
self.V = self.precision * self.Y
|
||||||
|
self.VVT_factor = self.precision * self.YYT_factor
|
||||||
|
self.covariance_matrix = np.eye(self.N) * 1./self.precision
|
||||||
|
|
||||||
|
def _gradients(self,partial):
|
||||||
|
gradients = []
|
||||||
|
aux = np.cumsum([0]+self.n_list)
|
||||||
|
for ai,af,noise_model in zip(aux[:-1],aux[1:],self.noise_model_list):
|
||||||
|
gradients += [noise_model._gradients(partial[ai:af])]
|
||||||
|
return np.hstack(gradients)
|
||||||
|
|
@ -1,180 +0,0 @@
|
||||||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import stats
|
|
||||||
import scipy as sp
|
|
||||||
import pylab as pb
|
|
||||||
from ..util.plot import gpplot
|
|
||||||
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
|
||||||
import link_functions
|
|
||||||
|
|
||||||
class LikelihoodFunction(object):
|
|
||||||
"""
|
|
||||||
Likelihood class for doing Expectation propagation
|
|
||||||
|
|
||||||
:param Y: observed output (Nx1 numpy.darray)
|
|
||||||
..Note:: Y values allowed depend on the LikelihoodFunction used
|
|
||||||
"""
|
|
||||||
def __init__(self,link):
|
|
||||||
if link == self._analytical:
|
|
||||||
self.moments_match = self._moments_match_analytical
|
|
||||||
else:
|
|
||||||
assert isinstance(link,link_functions.LinkFunction)
|
|
||||||
self.link = link
|
|
||||||
self.moments_match = self._moments_match_numerical
|
|
||||||
|
|
||||||
def _preprocess_values(self,Y):
|
|
||||||
return Y
|
|
||||||
|
|
||||||
def _product(self,gp,obs,mu,sigma):
|
|
||||||
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._distribution(gp,obs)
|
|
||||||
|
|
||||||
def _log_product_scaled(self,gp,obs,mu,sigma):
|
|
||||||
return -.5*(gp-mu)**2/sigma**2 + self._log_distribution_scaled(gp,obs)
|
|
||||||
|
|
||||||
def _log_product_scaled_dgp(self,gp,obs,mu,sigma):
|
|
||||||
return -(gp -mu)/sigma**2 + self._log_distribution_scaled_dgp(gp,obs)
|
|
||||||
|
|
||||||
def _locate(self,obs,mu,sigma):
|
|
||||||
"""
|
|
||||||
Golden Search to find the mode in the _product function (cavity x exact likelihood) and define a grid around it for numerical integration
|
|
||||||
"""
|
|
||||||
lower = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit #FIXME
|
|
||||||
upper = np.array([np.log(obs),mu]).max() #Upper limit #FIXME
|
|
||||||
#return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product
|
|
||||||
return sp.optimize.brent(self._nlog_product, args=(obs,mu,sigma), brack=(lower,upper)) #Better to work with _nlog_product than with _product
|
|
||||||
|
|
||||||
def _moments_match_numerical(self,obs,tau,v):
|
|
||||||
"""
|
|
||||||
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
|
|
||||||
"""
|
|
||||||
mu = v/tau
|
|
||||||
sigma = np.sqrt(1./tau)
|
|
||||||
opt = self._locate(obs,mu,sigma)
|
|
||||||
width = 3./np.log(max(obs,2))
|
|
||||||
A = opt - width #Grid's lower limit
|
|
||||||
B = opt + width #Grid's Upper limit
|
|
||||||
K = 10*int(np.log(max(obs,150))) #Number of points in the grid
|
|
||||||
h = (B-A)/K # length of the intervals
|
|
||||||
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
|
|
||||||
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
|
|
||||||
_aux1 = self._product(A,obs,mu,sigma)
|
|
||||||
_aux2 = self._product(B,obs,mu,sigma)
|
|
||||||
_aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma)
|
|
||||||
_aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma)
|
|
||||||
zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged
|
|
||||||
first = zeroth*x
|
|
||||||
second = first*x
|
|
||||||
Z_hat = sum(zeroth)*h/3 # Zero-th moment
|
|
||||||
mu_hat = sum(first)*h/(3*Z_hat) # First moment
|
|
||||||
m2 = sum(second)*h/(3*Z_hat) # Second moment
|
|
||||||
sigma2_hat = m2 - mu_hat**2 # Second central moment
|
|
||||||
return float(Z_hat), float(mu_hat), float(sigma2_hat)
|
|
||||||
|
|
||||||
class Binomial(LikelihoodFunction):
|
|
||||||
"""
|
|
||||||
Probit likelihood
|
|
||||||
Y is expected to take values in {-1,1}
|
|
||||||
-----
|
|
||||||
$$
|
|
||||||
L(x) = \\Phi (Y_i*f_i)
|
|
||||||
$$
|
|
||||||
"""
|
|
||||||
def __init__(self,link=None):
|
|
||||||
self._analytical = link_functions.Probit
|
|
||||||
if not link:
|
|
||||||
link = self._analytical
|
|
||||||
super(Binomial, self).__init__(link)
|
|
||||||
|
|
||||||
def _distribution(self,gp,obs):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _log_distribution_scaled(self,gp,obs):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _preprocess_values(self,Y):
|
|
||||||
"""
|
|
||||||
Check if the values of the observations correspond to the values
|
|
||||||
assumed by the likelihood function.
|
|
||||||
|
|
||||||
..Note:: Binary classification algorithm works better with classes {-1,1}
|
|
||||||
"""
|
|
||||||
Y_prep = Y.copy()
|
|
||||||
Y1 = Y[Y.flatten()==1].size
|
|
||||||
Y2 = Y[Y.flatten()==0].size
|
|
||||||
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
|
|
||||||
Y_prep[Y.flatten() == 0] = -1
|
|
||||||
return Y_prep
|
|
||||||
|
|
||||||
def _moments_match_analytical(self,data_i,tau_i,v_i):
|
|
||||||
"""
|
|
||||||
Moments match of the marginal approximation in EP algorithm
|
|
||||||
|
|
||||||
:param i: number of observation (int)
|
|
||||||
:param tau_i: precision of the cavity distribution (float)
|
|
||||||
:param v_i: mean/variance of the cavity distribution (float)
|
|
||||||
"""
|
|
||||||
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
|
|
||||||
Z_hat = std_norm_cdf(z)
|
|
||||||
phi = std_norm_pdf(z)
|
|
||||||
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
|
|
||||||
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
|
|
||||||
return Z_hat, mu_hat, sigma2_hat
|
|
||||||
|
|
||||||
def predictive_values(self,mu,var):
|
|
||||||
"""
|
|
||||||
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction
|
|
||||||
:param mu: mean of the latent variable
|
|
||||||
:param var: variance of the latent variable
|
|
||||||
"""
|
|
||||||
mu = mu.flatten()
|
|
||||||
var = var.flatten()
|
|
||||||
mean = stats.norm.cdf(mu/np.sqrt(1+var))
|
|
||||||
norm_025 = [stats.norm.ppf(.025,m,v) for m,v in zip(mu,var)]
|
|
||||||
norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)]
|
|
||||||
p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var))
|
|
||||||
p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var))
|
|
||||||
return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var
|
|
||||||
|
|
||||||
class Poisson(LikelihoodFunction):
|
|
||||||
"""
|
|
||||||
Poisson likelihood
|
|
||||||
Y is expected to take values in {0,1,2,...}
|
|
||||||
-----
|
|
||||||
$$
|
|
||||||
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
|
||||||
$$
|
|
||||||
"""
|
|
||||||
def __init__(self,link=None):
|
|
||||||
self._analytical = None
|
|
||||||
if not link:
|
|
||||||
link = link_functions.Log()
|
|
||||||
super(Poisson, self).__init__(link)
|
|
||||||
|
|
||||||
def _distribution(self,gp,obs):
|
|
||||||
return stats.poisson.pmf(obs,self.link.inv_transf(gp))
|
|
||||||
|
|
||||||
def _log_distribution_scaled(self,gp,obs):
|
|
||||||
"""
|
|
||||||
Logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
|
|
||||||
"""
|
|
||||||
return -self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp)
|
|
||||||
|
|
||||||
def _log_distribution_scaled_dgp(self,gp,obs):
|
|
||||||
return -self.link.inv_transf_df(gp) + obs * self.link.log_inv_transf_df(gp)
|
|
||||||
|
|
||||||
def _log_distribution_scaled_d2gp2(self,gp,obs):
|
|
||||||
return -self.link.inv_transf_df(gp) + obs * self.link.log_inv_transf_df(gp)
|
|
||||||
|
|
||||||
|
|
||||||
def predictive_values(self,mu,var):
|
|
||||||
"""
|
|
||||||
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
|
|
||||||
"""
|
|
||||||
mean = self.link.transf(mu)#np.exp(mu*self.scale + self.location)
|
|
||||||
tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
|
|
||||||
p_025 = tmp[:,0]
|
|
||||||
p_975 = tmp[:,1]
|
|
||||||
return mean,np.nan*mean,p_025,p_975 # better variance here TODO
|
|
||||||
|
|
@ -1,62 +0,0 @@
|
||||||
# Copyright (c) 2012, 2013 Ricardo Andrade
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import stats
|
|
||||||
import scipy as sp
|
|
||||||
import pylab as pb
|
|
||||||
from ..util.plot import gpplot
|
|
||||||
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
|
||||||
|
|
||||||
class LinkFunction(object):
|
|
||||||
"""
|
|
||||||
Link function class for doing non-Gaussian likelihoods approximation
|
|
||||||
|
|
||||||
:param Y: observed output (Nx1 numpy.darray)
|
|
||||||
..Note:: Y values allowed depend on the likelihood_function used
|
|
||||||
"""
|
|
||||||
def __init__(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class Probit(LinkFunction):
|
|
||||||
"""
|
|
||||||
Probit link function
|
|
||||||
"""
|
|
||||||
def transf(self,mu):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def inv_transf(self,f):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def log_inv_transf(self,f):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class Log(LinkFunction):
|
|
||||||
"""
|
|
||||||
Logarithm link function
|
|
||||||
"""
|
|
||||||
|
|
||||||
def transf(self,mu):
|
|
||||||
return np.log(mu)
|
|
||||||
|
|
||||||
def inv_transf(self,f):
|
|
||||||
return np.exp(f)
|
|
||||||
|
|
||||||
def log_inv_transf(self,f):
|
|
||||||
return f
|
|
||||||
|
|
||||||
def inv_transf_df(sefl,f):
|
|
||||||
return np.exp(f)
|
|
||||||
|
|
||||||
def log_inv_transf_df(self,f):
|
|
||||||
return 1
|
|
||||||
|
|
||||||
def inv_transf_df(sefl,f):
|
|
||||||
return np.exp(f)
|
|
||||||
|
|
||||||
def log_inv_transf_df(self,f):
|
|
||||||
return 1
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
88
GPy/likelihoods/noise_model_constructors.py
Normal file
88
GPy/likelihoods/noise_model_constructors.py
Normal file
|
|
@ -0,0 +1,88 @@
|
||||||
|
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import noise_models
|
||||||
|
|
||||||
|
def binomial(gp_link=None):
|
||||||
|
"""
|
||||||
|
Construct a binomial likelihood
|
||||||
|
|
||||||
|
:param gp_link: a GPy gp_link function
|
||||||
|
"""
|
||||||
|
if gp_link is None:
|
||||||
|
gp_link = noise_models.gp_transformations.Probit()
|
||||||
|
#else:
|
||||||
|
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
|
||||||
|
|
||||||
|
if isinstance(gp_link,noise_models.gp_transformations.Probit):
|
||||||
|
analytical_mean = True
|
||||||
|
analytical_variance = False
|
||||||
|
|
||||||
|
elif isinstance(gp_link,noise_models.gp_transformations.Step):
|
||||||
|
analytical_mean = True
|
||||||
|
analytical_variance = True
|
||||||
|
|
||||||
|
else:
|
||||||
|
analytical_mean = False
|
||||||
|
analytical_variance = False
|
||||||
|
|
||||||
|
return noise_models.binomial_noise.Binomial(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
|
def exponential(gp_link=None):
|
||||||
|
"""
|
||||||
|
Construct a binomial likelihood
|
||||||
|
|
||||||
|
:param gp_link: a GPy gp_link function
|
||||||
|
"""
|
||||||
|
if gp_link is None:
|
||||||
|
gp_link = noise_models.gp_transformations.Identity()
|
||||||
|
|
||||||
|
analytical_mean = False
|
||||||
|
analytical_variance = False
|
||||||
|
return noise_models.exponential_noise.Exponential(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
|
def gaussian(gp_link=None,variance=1.):
|
||||||
|
"""
|
||||||
|
Construct a gaussian likelihood
|
||||||
|
|
||||||
|
:param gp_link: a GPy gp_link function
|
||||||
|
:param variance: scalar
|
||||||
|
"""
|
||||||
|
if gp_link is None:
|
||||||
|
gp_link = noise_models.gp_transformations.Identity()
|
||||||
|
#else:
|
||||||
|
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
|
||||||
|
|
||||||
|
analytical_mean = False
|
||||||
|
analytical_variance = False
|
||||||
|
return noise_models.gaussian_noise.Gaussian(gp_link,analytical_mean,analytical_variance,variance)
|
||||||
|
|
||||||
|
def poisson(gp_link=None):
|
||||||
|
"""
|
||||||
|
Construct a Poisson likelihood
|
||||||
|
|
||||||
|
:param gp_link: a GPy gp_link function
|
||||||
|
"""
|
||||||
|
if gp_link is None:
|
||||||
|
gp_link = noise_models.gp_transformations.Log_ex_1()
|
||||||
|
#else:
|
||||||
|
# assert isinstance(gp_link,noise_models.gp_transformations.GPTransformation), 'gp_link function is not valid.'
|
||||||
|
analytical_mean = False
|
||||||
|
analytical_variance = False
|
||||||
|
return noise_models.poisson_noise.Poisson(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
|
def gamma(gp_link=None,beta=1.):
|
||||||
|
"""
|
||||||
|
Construct a Gamma likelihood
|
||||||
|
|
||||||
|
:param gp_link: a GPy gp_link function
|
||||||
|
:param beta: scalar
|
||||||
|
"""
|
||||||
|
if gp_link is None:
|
||||||
|
gp_link = noise_models.gp_transformations.Log_ex_1()
|
||||||
|
analytical_mean = False
|
||||||
|
analytical_variance = False
|
||||||
|
return noise_models.gamma_noise.Gamma(gp_link,analytical_mean,analytical_variance,beta)
|
||||||
|
|
||||||
|
|
||||||
7
GPy/likelihoods/noise_models/__init__.py
Normal file
7
GPy/likelihoods/noise_models/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
import noise_distributions
|
||||||
|
import binomial_noise
|
||||||
|
import exponential_noise
|
||||||
|
import gaussian_noise
|
||||||
|
import gamma_noise
|
||||||
|
import poisson_noise
|
||||||
|
import gp_transformations
|
||||||
118
GPy/likelihoods/noise_models/binomial_noise.py
Normal file
118
GPy/likelihoods/noise_models/binomial_noise.py
Normal file
|
|
@ -0,0 +1,118 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats,special
|
||||||
|
import scipy as sp
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import gp_transformations
|
||||||
|
from noise_distributions import NoiseDistribution
|
||||||
|
|
||||||
|
class Binomial(NoiseDistribution):
|
||||||
|
"""
|
||||||
|
Probit likelihood
|
||||||
|
Y is expected to take values in {-1,1}
|
||||||
|
-----
|
||||||
|
$$
|
||||||
|
L(x) = \\Phi (Y_i*f_i)
|
||||||
|
$$
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
||||||
|
super(Binomial, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y):
|
||||||
|
"""
|
||||||
|
Check if the values of the observations correspond to the values
|
||||||
|
assumed by the likelihood function.
|
||||||
|
|
||||||
|
..Note:: Binary classification algorithm works better with classes {-1,1}
|
||||||
|
"""
|
||||||
|
Y_prep = Y.copy()
|
||||||
|
Y1 = Y[Y.flatten()==1].size
|
||||||
|
Y2 = Y[Y.flatten()==0].size
|
||||||
|
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
|
||||||
|
Y_prep[Y.flatten() == 0] = -1
|
||||||
|
return Y_prep
|
||||||
|
|
||||||
|
def _moments_match_analytical(self,data_i,tau_i,v_i):
|
||||||
|
"""
|
||||||
|
Moments match of the marginal approximation in EP algorithm
|
||||||
|
|
||||||
|
:param i: number of observation (int)
|
||||||
|
:param tau_i: precision of the cavity distribution (float)
|
||||||
|
:param v_i: mean/variance of the cavity distribution (float)
|
||||||
|
"""
|
||||||
|
if isinstance(self.gp_link,gp_transformations.Probit):
|
||||||
|
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
|
||||||
|
Z_hat = std_norm_cdf(z)
|
||||||
|
phi = std_norm_pdf(z)
|
||||||
|
mu_hat = v_i/tau_i + data_i*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
|
||||||
|
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
|
||||||
|
|
||||||
|
elif isinstance(self.gp_link,gp_transformations.Step):
|
||||||
|
Z_hat = None
|
||||||
|
mu_hat = None
|
||||||
|
sigma2_hat = None
|
||||||
|
|
||||||
|
return Z_hat, mu_hat, sigma2_hat
|
||||||
|
|
||||||
|
def _predictive_mean_analytical(self,mu,sigma):
|
||||||
|
return stats.norm.cdf(mu/np.sqrt(1+sigma**2))
|
||||||
|
|
||||||
|
def _mass(self,gp,obs):
|
||||||
|
#NOTE obs must be in {0,1}
|
||||||
|
p = self.gp_link.transf(gp)
|
||||||
|
return p**obs * (1.-p)**(1.-obs)
|
||||||
|
|
||||||
|
def _nlog_mass(self,gp,obs):
|
||||||
|
p = self.gp_link.transf(gp)
|
||||||
|
return obs*np.log(p) + (1.-obs)*np.log(1-p)
|
||||||
|
|
||||||
|
def _dnlog_mass_dgp(self,gp,obs):
|
||||||
|
p = self.gp_link.transf(gp)
|
||||||
|
dp = self.gp_link.dtransf_df(gp)
|
||||||
|
return obs/p * dp - (1.-obs)/(1.-p) * dp
|
||||||
|
|
||||||
|
def _d2nlog_mass_dgp2(self,gp,obs):
|
||||||
|
p = self.gp_link.transf(gp)
|
||||||
|
return (obs/p + (1.-obs)/(1.-p))*self.gp_link.d2transf_df2(gp) + ((1.-obs)/(1.-p)**2-obs/p**2)*self.gp_link.dtransf_df(gp)
|
||||||
|
|
||||||
|
def _mean(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _dmean_dgp(self,gp):
|
||||||
|
return self.gp_link.dtransf_df(gp)
|
||||||
|
|
||||||
|
def _d2mean_dgp2(self,gp):
|
||||||
|
return self.gp_link.d2transf_df2(gp)
|
||||||
|
|
||||||
|
def _variance(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
p = self.gp_link.transf(gp)
|
||||||
|
return p*(1.-p)
|
||||||
|
|
||||||
|
def _dvariance_dgp(self,gp):
|
||||||
|
return self.gp_link.dtransf_df(gp)*(1. - 2.*self.gp_link.transf(gp))
|
||||||
|
|
||||||
|
def _d2variance_dgp2(self,gp):
|
||||||
|
return self.gp_link.d2transf_df2(gp)*(1. - 2.*self.gp_link.transf(gp)) - 2*self.gp_link.dtransf_df(gp)**2
|
||||||
|
|
||||||
|
"""
|
||||||
|
def predictive_values(self,mu,var): #TODO remove
|
||||||
|
mu = mu.flatten()
|
||||||
|
var = var.flatten()
|
||||||
|
#mean = stats.norm.cdf(mu/np.sqrt(1+var))
|
||||||
|
mean = self._predictive_mean_analytical(mu,np.sqrt(var))
|
||||||
|
norm_025 = [stats.norm.ppf(.025,m,v) for m,v in zip(mu,var)]
|
||||||
|
norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)]
|
||||||
|
#p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var))
|
||||||
|
#p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var))
|
||||||
|
p_025 = self._predictive_mean_analytical(norm_025,np.sqrt(var))
|
||||||
|
p_975 = self._predictive_mean_analytical(norm_975,np.sqrt(var))
|
||||||
|
return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var
|
||||||
|
"""
|
||||||
68
GPy/likelihoods/noise_models/exponential_noise.py
Normal file
68
GPy/likelihoods/noise_models/exponential_noise.py
Normal file
|
|
@ -0,0 +1,68 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats,special
|
||||||
|
import scipy as sp
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import gp_transformations
|
||||||
|
from noise_distributions import NoiseDistribution
|
||||||
|
|
||||||
|
class Exponential(NoiseDistribution):
|
||||||
|
"""
|
||||||
|
Gamma likelihood
|
||||||
|
Y is expected to take values in {0,1,2,...}
|
||||||
|
-----
|
||||||
|
$$
|
||||||
|
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
||||||
|
$$
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
||||||
|
super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y):
|
||||||
|
return Y
|
||||||
|
|
||||||
|
def _mass(self,gp,obs):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return np.exp(-obs/self.gp_link.transf(gp))/self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _nlog_mass(self,gp,obs):
|
||||||
|
"""
|
||||||
|
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
|
||||||
|
"""
|
||||||
|
return obs/self.gp_link.transf(gp) + np.log(self.gp_link.transf(gp))
|
||||||
|
|
||||||
|
def _dnlog_mass_dgp(self,gp,obs):
|
||||||
|
return ( 1./self.gp_link.transf(gp) - obs/self.gp_link.transf(gp)**2) * self.gp_link.dtransf_df(gp)
|
||||||
|
|
||||||
|
def _d2nlog_mass_dgp2(self,gp,obs):
|
||||||
|
fgp = self.gp_link.transf(gp)
|
||||||
|
return (2*obs/fgp**3 - 1./fgp**2) * self.gp_link.dtransf_df(gp)**2 + ( 1./fgp - obs/fgp**2) * self.gp_link.d2transf_df2(gp)
|
||||||
|
|
||||||
|
def _mean(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _dmean_dgp(self,gp):
|
||||||
|
return self.gp_link.dtransf_df(gp)
|
||||||
|
|
||||||
|
def _d2mean_dgp2(self,gp):
|
||||||
|
return self.gp_link.d2transf_df2(gp)
|
||||||
|
|
||||||
|
def _variance(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)**2
|
||||||
|
|
||||||
|
def _dvariance_dgp(self,gp):
|
||||||
|
return 2*self.gp_link.transf(gp)*self.gp_link.dtransf_df(gp)
|
||||||
|
|
||||||
|
def _d2variance_dgp2(self,gp):
|
||||||
|
return 2 * (self.gp_link.dtransf_df(gp)**2 + self.gp_link.transf(gp)*self.gp_link.d2transf_df2(gp))
|
||||||
71
GPy/likelihoods/noise_models/gamma_noise.py
Normal file
71
GPy/likelihoods/noise_models/gamma_noise.py
Normal file
|
|
@ -0,0 +1,71 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats,special
|
||||||
|
import scipy as sp
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import gp_transformations
|
||||||
|
from noise_distributions import NoiseDistribution
|
||||||
|
|
||||||
|
class Gamma(NoiseDistribution):
|
||||||
|
"""
|
||||||
|
Gamma likelihood
|
||||||
|
Y is expected to take values in {0,1,2,...}
|
||||||
|
-----
|
||||||
|
$$
|
||||||
|
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
||||||
|
$$
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
|
||||||
|
self.beta = beta
|
||||||
|
super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y):
|
||||||
|
return Y
|
||||||
|
|
||||||
|
def _mass(self,gp,obs):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
#return stats.gamma.pdf(obs,a = self.gp_link.transf(gp)/self.variance,scale=self.variance)
|
||||||
|
alpha = self.gp_link.transf(gp)*self.beta
|
||||||
|
return obs**(alpha - 1.) * np.exp(-self.beta*obs) * self.beta**alpha / special.gamma(alpha)
|
||||||
|
|
||||||
|
def _nlog_mass(self,gp,obs):
|
||||||
|
"""
|
||||||
|
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
|
||||||
|
"""
|
||||||
|
alpha = self.gp_link.transf(gp)*self.beta
|
||||||
|
return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
|
||||||
|
|
||||||
|
def _dnlog_mass_dgp(self,gp,obs):
|
||||||
|
return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
|
||||||
|
|
||||||
|
def _d2nlog_mass_dgp2(self,gp,obs):
|
||||||
|
return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
|
||||||
|
|
||||||
|
def _mean(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _dmean_dgp(self,gp):
|
||||||
|
return self.gp_link.dtransf_df(gp)
|
||||||
|
|
||||||
|
def _d2mean_dgp2(self,gp):
|
||||||
|
return self.gp_link.d2transf_df2(gp)
|
||||||
|
|
||||||
|
def _variance(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)/self.beta
|
||||||
|
|
||||||
|
def _dvariance_dgp(self,gp):
|
||||||
|
return self.gp_link.dtransf_df(gp)/self.beta
|
||||||
|
|
||||||
|
def _d2variance_dgp2(self,gp):
|
||||||
|
return self.gp_link.d2transf_df2(gp)/self.beta
|
||||||
98
GPy/likelihoods/noise_models/gaussian_noise.py
Normal file
98
GPy/likelihoods/noise_models/gaussian_noise.py
Normal file
|
|
@ -0,0 +1,98 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats,special
|
||||||
|
import scipy as sp
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import gp_transformations
|
||||||
|
from noise_distributions import NoiseDistribution
|
||||||
|
|
||||||
|
class Gaussian(NoiseDistribution):
|
||||||
|
"""
|
||||||
|
Gaussian likelihood
|
||||||
|
|
||||||
|
:param mean: mean value of the Gaussian distribution
|
||||||
|
:param variance: mean value of the Gaussian distribution
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,variance=1.):
|
||||||
|
self.variance = variance
|
||||||
|
super(Gaussian, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.array([self.variance])
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return ['noise_model_variance']
|
||||||
|
|
||||||
|
def _set_params(self,p):
|
||||||
|
self.variance = p
|
||||||
|
|
||||||
|
def _gradients(self,partial):
|
||||||
|
return np.zeros(1)
|
||||||
|
#return np.sum(partial)
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y):
|
||||||
|
"""
|
||||||
|
Check if the values of the observations correspond to the values
|
||||||
|
assumed by the likelihood function.
|
||||||
|
"""
|
||||||
|
return Y
|
||||||
|
|
||||||
|
def _moments_match_analytical(self,data_i,tau_i,v_i):
|
||||||
|
"""
|
||||||
|
Moments match of the marginal approximation in EP algorithm
|
||||||
|
|
||||||
|
:param i: number of observation (int)
|
||||||
|
:param tau_i: precision of the cavity distribution (float)
|
||||||
|
:param v_i: mean/variance of the cavity distribution (float)
|
||||||
|
"""
|
||||||
|
sigma2_hat = 1./(1./self.variance + tau_i)
|
||||||
|
mu_hat = sigma2_hat*(data_i/self.variance + v_i)
|
||||||
|
sum_var = self.variance + 1./tau_i
|
||||||
|
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
|
||||||
|
return Z_hat, mu_hat, sigma2_hat
|
||||||
|
|
||||||
|
def _predictive_mean_analytical(self,mu,sigma):
|
||||||
|
new_sigma2 = self.predictive_variance(mu,sigma)
|
||||||
|
return new_sigma2*(mu/sigma**2 + self.gp_link.transf(mu)/self.variance)
|
||||||
|
|
||||||
|
def _predictive_variance_analytical(self,mu,sigma,*args): #TODO *args?
|
||||||
|
return 1./(1./self.variance + 1./sigma**2)
|
||||||
|
|
||||||
|
def _mass(self,gp,obs):
|
||||||
|
#return std_norm_pdf( (self.gp_link.transf(gp)-obs)/np.sqrt(self.variance) )
|
||||||
|
return stats.norm.pdf(obs,self.gp_link.transf(gp),np.sqrt(self.variance)) #FIXME
|
||||||
|
|
||||||
|
def _nlog_mass(self,gp,obs):
|
||||||
|
return .5*((self.gp_link.transf(gp)-obs)**2/self.variance + np.log(2.*np.pi*self.variance))
|
||||||
|
|
||||||
|
def _dnlog_mass_dgp(self,gp,obs):
|
||||||
|
return (self.gp_link.transf(gp)-obs)/self.variance * self.gp_link.dtransf_df(gp)
|
||||||
|
|
||||||
|
def _d2nlog_mass_dgp2(self,gp,obs):
|
||||||
|
return ((self.gp_link.transf(gp)-obs)*self.gp_link.d2transf_df2(gp) + self.gp_link.dtransf_df(gp)**2)/self.variance
|
||||||
|
|
||||||
|
def _mean(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _dmean_dgp(self,gp):
|
||||||
|
return self.gp_link.dtransf_df(gp)
|
||||||
|
|
||||||
|
def _d2mean_dgp2(self,gp):
|
||||||
|
return self.gp_link.d2transf_df2(gp)
|
||||||
|
|
||||||
|
def _variance(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.variance
|
||||||
|
|
||||||
|
def _dvariance_dgp(self,gp):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def _d2variance_dgp2(self,gp):
|
||||||
|
return 0
|
||||||
125
GPy/likelihoods/noise_models/gp_transformations.py
Normal file
125
GPy/likelihoods/noise_models/gp_transformations.py
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats
|
||||||
|
import scipy as sp
|
||||||
|
import pylab as pb
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
|
||||||
|
|
||||||
|
class GPTransformation(object):
|
||||||
|
"""
|
||||||
|
Link function class for doing non-Gaussian likelihoods approximation
|
||||||
|
|
||||||
|
:param Y: observed output (Nx1 numpy.darray)
|
||||||
|
..Note:: Y values allowed depend on the likelihood_function used
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class Identity(GPTransformation):
|
||||||
|
"""
|
||||||
|
$$
|
||||||
|
g(f) = f
|
||||||
|
$$
|
||||||
|
"""
|
||||||
|
#def transf(self,mu):
|
||||||
|
# return mu
|
||||||
|
|
||||||
|
def transf(self,f):
|
||||||
|
return f
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
return 1.
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
class Probit(GPTransformation):
|
||||||
|
"""
|
||||||
|
$$
|
||||||
|
g(f) = \\Phi^{-1} (mu)
|
||||||
|
$$
|
||||||
|
"""
|
||||||
|
#def transf(self,mu):
|
||||||
|
# return inv_std_norm_cdf(mu)
|
||||||
|
|
||||||
|
def transf(self,f):
|
||||||
|
return std_norm_cdf(f)
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
return std_norm_pdf(f)
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
return -f * std_norm_pdf(f)
|
||||||
|
|
||||||
|
class Log(GPTransformation):
|
||||||
|
"""
|
||||||
|
$$
|
||||||
|
g(f) = \log(\mu)
|
||||||
|
$$
|
||||||
|
"""
|
||||||
|
#def transf(self,mu):
|
||||||
|
# return np.log(mu)
|
||||||
|
|
||||||
|
def transf(self,f):
|
||||||
|
return np.exp(f)
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
return np.exp(f)
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
return np.exp(f)
|
||||||
|
|
||||||
|
class Log_ex_1(GPTransformation):
|
||||||
|
"""
|
||||||
|
$$
|
||||||
|
g(f) = \log(\exp(\mu) - 1)
|
||||||
|
$$
|
||||||
|
"""
|
||||||
|
#def transf(self,mu):
|
||||||
|
# """
|
||||||
|
# function: output space -> latent space
|
||||||
|
# """
|
||||||
|
# return np.log(np.exp(mu) - 1)
|
||||||
|
|
||||||
|
def transf(self,f):
|
||||||
|
"""
|
||||||
|
function: latent space -> output space
|
||||||
|
"""
|
||||||
|
return np.log(1.+np.exp(f))
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
return np.exp(f)/(1.+np.exp(f))
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
aux = np.exp(f)/(1.+np.exp(f))
|
||||||
|
return aux*(1.-aux)
|
||||||
|
|
||||||
|
class Reciprocal(GPTransformation):
|
||||||
|
def transf(sefl,f):
|
||||||
|
return 1./f
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
return -1./f**2
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
return 2./f**3
|
||||||
|
|
||||||
|
class Step(GPTransformation):
|
||||||
|
"""
|
||||||
|
$$
|
||||||
|
g(f) = I_{x \in A}
|
||||||
|
$$
|
||||||
|
"""
|
||||||
|
def transf(self,f):
|
||||||
|
#transformation goes here
|
||||||
|
return np.where(f>0, 1, -1)
|
||||||
|
|
||||||
|
def dtransf_df(self,f):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def d2transf_df2(self,f):
|
||||||
|
pass
|
||||||
384
GPy/likelihoods/noise_models/noise_distributions.py
Normal file
384
GPy/likelihoods/noise_models/noise_distributions.py
Normal file
|
|
@ -0,0 +1,384 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats,special
|
||||||
|
import scipy as sp
|
||||||
|
import pylab as pb
|
||||||
|
from GPy.util.plot import gpplot
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import gp_transformations
|
||||||
|
|
||||||
|
|
||||||
|
class NoiseDistribution(object):
|
||||||
|
"""
|
||||||
|
Likelihood class for doing Expectation propagation
|
||||||
|
|
||||||
|
:param Y: observed output (Nx1 numpy.darray)
|
||||||
|
..Note:: Y values allowed depend on the LikelihoodFunction used
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link,analytical_mean=False,analytical_variance=False):
|
||||||
|
#assert isinstance(gp_link,gp_transformations.GPTransformation), "gp_link is not a valid GPTransformation."#FIXME
|
||||||
|
self.gp_link = gp_link
|
||||||
|
self.analytical_mean = analytical_mean
|
||||||
|
self.analytical_variance = analytical_variance
|
||||||
|
if self.analytical_mean:
|
||||||
|
self.moments_match = self._moments_match_analytical
|
||||||
|
self.predictive_mean = self._predictive_mean_analytical
|
||||||
|
else:
|
||||||
|
self.moments_match = self._moments_match_numerical
|
||||||
|
self.predictive_mean = self._predictive_mean_numerical
|
||||||
|
if self.analytical_variance:
|
||||||
|
self.predictive_variance = self._predictive_variance_analytical
|
||||||
|
else:
|
||||||
|
self.predictive_variance = self._predictive_variance_numerical
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.zeros(0)
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _set_params(self,p):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _gradients(self,partial):
|
||||||
|
return np.zeros(0)
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y):
|
||||||
|
"""
|
||||||
|
In case it is needed, this function assess the output values or makes any pertinent transformation on them.
|
||||||
|
|
||||||
|
:param Y: observed output (Nx1 numpy.darray)
|
||||||
|
"""
|
||||||
|
return Y
|
||||||
|
|
||||||
|
def _product(self,gp,obs,mu,sigma):
|
||||||
|
"""
|
||||||
|
Product between the cavity distribution and a likelihood factor.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
:param obs: observed output
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
"""
|
||||||
|
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._mass(gp,obs)
|
||||||
|
|
||||||
|
def _nlog_product_scaled(self,gp,obs,mu,sigma):
|
||||||
|
"""
|
||||||
|
Negative log-product between the cavity distribution and a likelihood factor.
|
||||||
|
..Note:: The constant term in the Gaussian distribution is ignored.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
:param obs: observed output
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
"""
|
||||||
|
return .5*((gp-mu)/sigma)**2 + self._nlog_mass(gp,obs)
|
||||||
|
|
||||||
|
def _dnlog_product_dgp(self,gp,obs,mu,sigma):
|
||||||
|
"""
|
||||||
|
Derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
:param obs: observed output
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
"""
|
||||||
|
return (gp - mu)/sigma**2 + self._dnlog_mass_dgp(gp,obs)
|
||||||
|
|
||||||
|
def _d2nlog_product_dgp2(self,gp,obs,mu,sigma):
|
||||||
|
"""
|
||||||
|
Second derivative wrt latent variable of the log-product between the cavity distribution and a likelihood factor.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
:param obs: observed output
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
"""
|
||||||
|
return 1./sigma**2 + self._d2nlog_mass_dgp2(gp,obs)
|
||||||
|
|
||||||
|
def _product_mode(self,obs,mu,sigma):
|
||||||
|
"""
|
||||||
|
Newton's CG method to find the mode in _product (cavity x likelihood factor).
|
||||||
|
|
||||||
|
:param obs: observed output
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
"""
|
||||||
|
return sp.optimize.fmin_ncg(self._nlog_product_scaled,x0=mu,fprime=self._dnlog_product_dgp,fhess=self._d2nlog_product_dgp2,args=(obs,mu,sigma))
|
||||||
|
|
||||||
|
def _moments_match_analytical(self,obs,tau,v):
|
||||||
|
"""
|
||||||
|
If available, this function computes the moments analytically.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _moments_match_numerical(self,obs,tau,v):
|
||||||
|
"""
|
||||||
|
Lapace approximation to calculate the moments.
|
||||||
|
|
||||||
|
:param obs: observed output
|
||||||
|
:param tau: cavity distribution 1st natural parameter (precision)
|
||||||
|
:param v: cavity distribution 2nd natural paramenter (mu*precision)
|
||||||
|
"""
|
||||||
|
mu = v/tau
|
||||||
|
mu_hat = self._product_mode(obs,mu,np.sqrt(1./tau))
|
||||||
|
sigma2_hat = 1./(tau + self._d2nlog_mass_dgp2(mu_hat,obs))
|
||||||
|
Z_hat = np.exp(-.5*tau*(mu_hat-mu)**2) * self._mass(mu_hat,obs)*np.sqrt(tau*sigma2_hat)
|
||||||
|
return Z_hat,mu_hat,sigma2_hat
|
||||||
|
|
||||||
|
def _nlog_conditional_mean_scaled(self,gp,mu,sigma):
|
||||||
|
"""
|
||||||
|
Negative logarithm of the l.v.'s predictive distribution times the output's mean given the l.v.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
|
||||||
|
..Note:: This function helps computing E(Y_star) = E(E(Y_star|f_star))
|
||||||
|
"""
|
||||||
|
return .5*((gp - mu)/sigma)**2 - np.log(self._mean(gp))
|
||||||
|
|
||||||
|
def _dnlog_conditional_mean_dgp(self,gp,mu,sigma):
|
||||||
|
"""
|
||||||
|
Derivative of _nlog_conditional_mean_scaled wrt. l.v.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
"""
|
||||||
|
return (gp - mu)/sigma**2 - self._dmean_dgp(gp)/self._mean(gp)
|
||||||
|
|
||||||
|
def _d2nlog_conditional_mean_dgp2(self,gp,mu,sigma):
|
||||||
|
"""
|
||||||
|
Second derivative of _nlog_conditional_mean_scaled wrt. l.v.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
"""
|
||||||
|
return 1./sigma**2 - self._d2mean_dgp2(gp)/self._mean(gp) + (self._dmean_dgp(gp)/self._mean(gp))**2
|
||||||
|
|
||||||
|
def _nlog_exp_conditional_variance_scaled(self,gp,mu,sigma):
|
||||||
|
"""
|
||||||
|
Negative logarithm of the l.v.'s predictive distribution times the output's variance given the l.v.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
|
||||||
|
..Note:: This function helps computing E(V(Y_star|f_star))
|
||||||
|
"""
|
||||||
|
return .5*((gp - mu)/sigma)**2 - np.log(self._variance(gp))
|
||||||
|
|
||||||
|
def _dnlog_exp_conditional_variance_dgp(self,gp,mu,sigma):
|
||||||
|
"""
|
||||||
|
Derivative of _nlog_exp_conditional_variance_scaled wrt. l.v.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
"""
|
||||||
|
return (gp - mu)/sigma**2 - self._dvariance_dgp(gp)/self._variance(gp)
|
||||||
|
|
||||||
|
def _d2nlog_exp_conditional_variance_dgp2(self,gp,mu,sigma):
|
||||||
|
"""
|
||||||
|
Second derivative of _nlog_exp_conditional_variance_scaled wrt. l.v.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
"""
|
||||||
|
return 1./sigma**2 - self._d2variance_dgp2(gp)/self._variance(gp) + (self._dvariance_dgp(gp)/self._variance(gp))**2
|
||||||
|
|
||||||
|
def _nlog_exp_conditional_mean_sq_scaled(self,gp,mu,sigma):
|
||||||
|
"""
|
||||||
|
Negative logarithm of the l.v.'s predictive distribution times the output's mean squared given the l.v.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
|
||||||
|
..Note:: This function helps computing E( E(Y_star|f_star)**2 )
|
||||||
|
"""
|
||||||
|
return .5*((gp - mu)/sigma)**2 - 2*np.log(self._mean(gp))
|
||||||
|
|
||||||
|
def _dnlog_exp_conditional_mean_sq_dgp(self,gp,mu,sigma):
|
||||||
|
"""
|
||||||
|
Derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
"""
|
||||||
|
return (gp - mu)/sigma**2 - 2*self._dmean_dgp(gp)/self._mean(gp)
|
||||||
|
|
||||||
|
def _d2nlog_exp_conditional_mean_sq_dgp2(self,gp,mu,sigma):
|
||||||
|
"""
|
||||||
|
Second derivative of _nlog_exp_conditional_mean_sq_scaled wrt. l.v.
|
||||||
|
|
||||||
|
:param gp: latent variable
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
"""
|
||||||
|
return 1./sigma**2 - 2*( self._d2mean_dgp2(gp)/self._mean(gp) - (self._dmean_dgp(gp)/self._mean(gp))**2 )
|
||||||
|
|
||||||
|
def _predictive_mean_analytical(self,mu,sigma):
|
||||||
|
"""
|
||||||
|
If available, this function computes the predictive mean analytically.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _predictive_variance_analytical(self,mu,sigma):
|
||||||
|
"""
|
||||||
|
If available, this function computes the predictive variance analytically.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _predictive_mean_numerical(self,mu,sigma):
|
||||||
|
"""
|
||||||
|
Laplace approximation to the predictive mean: E(Y_star) = E( E(Y_star|f_star) )
|
||||||
|
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
"""
|
||||||
|
maximum = sp.optimize.fmin_ncg(self._nlog_conditional_mean_scaled,x0=self._mean(mu),fprime=self._dnlog_conditional_mean_dgp,fhess=self._d2nlog_conditional_mean_dgp2,args=(mu,sigma))
|
||||||
|
mean = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma))*sigma)
|
||||||
|
"""
|
||||||
|
|
||||||
|
pb.figure()
|
||||||
|
x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)])
|
||||||
|
f = np.array([np.exp(-self._nlog_conditional_mean_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x])
|
||||||
|
pb.plot(x,f,'b-')
|
||||||
|
sigma2 = 1./self._d2nlog_conditional_mean_dgp2(maximum,mu,sigma)
|
||||||
|
f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2)
|
||||||
|
k = np.exp(-self._nlog_conditional_mean_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2)
|
||||||
|
pb.plot(x,f2*mean,'r-')
|
||||||
|
pb.vlines(maximum,0,f.max())
|
||||||
|
"""
|
||||||
|
return mean
|
||||||
|
|
||||||
|
def _predictive_mean_sq(self,mu,sigma):
|
||||||
|
"""
|
||||||
|
Laplace approximation to the predictive mean squared: E(Y_star**2) = E( E(Y_star|f_star)**2 )
|
||||||
|
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
"""
|
||||||
|
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_mean_sq_scaled,x0=self._mean(mu),fprime=self._dnlog_exp_conditional_mean_sq_dgp,fhess=self._d2nlog_exp_conditional_mean_sq_dgp2,args=(mu,sigma))
|
||||||
|
mean_squared = np.exp(-self._nlog_exp_conditional_mean_sq_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_mean_sq_dgp2(maximum,mu,sigma))*sigma)
|
||||||
|
return mean_squared
|
||||||
|
|
||||||
|
def _predictive_variance_numerical(self,mu,sigma,predictive_mean=None):
|
||||||
|
"""
|
||||||
|
Laplace approximation to the predictive variance: V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
||||||
|
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
|
||||||
|
"""
|
||||||
|
# E( V(Y_star|f_star) )
|
||||||
|
maximum = sp.optimize.fmin_ncg(self._nlog_exp_conditional_variance_scaled,x0=self._variance(mu),fprime=self._dnlog_exp_conditional_variance_dgp,fhess=self._d2nlog_exp_conditional_variance_dgp2,args=(mu,sigma))
|
||||||
|
exp_var = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))/(np.sqrt(self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma))*sigma)
|
||||||
|
|
||||||
|
"""
|
||||||
|
pb.figure()
|
||||||
|
x = np.array([mu + step*sigma for step in np.linspace(-7,7,100)])
|
||||||
|
f = np.array([np.exp(-self._nlog_exp_conditional_variance_scaled(xi,mu,sigma))/np.sqrt(2*np.pi*sigma**2) for xi in x])
|
||||||
|
pb.plot(x,f,'b-')
|
||||||
|
sigma2 = 1./self._d2nlog_exp_conditional_variance_dgp2(maximum,mu,sigma)
|
||||||
|
f2 = np.exp(-.5*(x-maximum)**2/sigma2)/np.sqrt(2*np.pi*sigma2)
|
||||||
|
k = np.exp(-self._nlog_exp_conditional_variance_scaled(maximum,mu,sigma))*np.sqrt(sigma2)/np.sqrt(sigma**2)
|
||||||
|
pb.plot(x,f2*exp_var,'r--')
|
||||||
|
pb.vlines(maximum,0,f.max())
|
||||||
|
"""
|
||||||
|
|
||||||
|
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star)**2 )
|
||||||
|
exp_exp2 = self._predictive_mean_sq(mu,sigma)
|
||||||
|
if predictive_mean is None:
|
||||||
|
predictive_mean = self.predictive_mean(mu,sigma)
|
||||||
|
var_exp = exp_exp2 - predictive_mean**2
|
||||||
|
return exp_var + var_exp
|
||||||
|
|
||||||
|
def _predictive_percentiles(self,p,mu,sigma):
|
||||||
|
"""
|
||||||
|
Percentiles of the predictive distribution
|
||||||
|
|
||||||
|
:parm p: lower tail probability
|
||||||
|
:param mu: cavity distribution mean
|
||||||
|
:param sigma: cavity distribution standard deviation
|
||||||
|
:predictive_mean: output's predictive mean, if None _predictive_mean function will be called.
|
||||||
|
"""
|
||||||
|
qf = stats.norm.ppf(p,mu,sigma)
|
||||||
|
return self.gp_link.transf(qf)
|
||||||
|
|
||||||
|
def _nlog_joint_predictive_scaled(self,x,mu,sigma):
|
||||||
|
"""
|
||||||
|
Negative logarithm of the joint predictive distribution (latent variable and output).
|
||||||
|
|
||||||
|
:param x: tuple (latent variable,output)
|
||||||
|
:param mu: latent variable's predictive mean
|
||||||
|
:param sigma: latent variable's predictive standard deviation
|
||||||
|
"""
|
||||||
|
return self._nlog_product_scaled(x[0],x[1],mu,sigma)
|
||||||
|
|
||||||
|
def _gradient_nlog_joint_predictive(self,x,mu,sigma):
|
||||||
|
"""
|
||||||
|
Gradient of _nlog_joint_predictive_scaled.
|
||||||
|
|
||||||
|
:param x: tuple (latent variable,output)
|
||||||
|
:param mu: latent variable's predictive mean
|
||||||
|
:param sigma: latent variable's predictive standard deviation
|
||||||
|
..Note: Only avilable when the output is continuous
|
||||||
|
"""
|
||||||
|
assert not self.discrete, "Gradient not available for discrete outputs."
|
||||||
|
return np.array((self._dnlog_product_dgp(gp=x[0],obs=x[1],mu=mu,sigma=sigma),self._dnlog_mass_dobs(obs=x[1],gp=x[0])))
|
||||||
|
|
||||||
|
def _hessian_nlog_joint_predictive(self,x,mu,sigma):
|
||||||
|
"""
|
||||||
|
Hessian of _nlog_joint_predictive_scaled.
|
||||||
|
|
||||||
|
:param x: tuple (latent variable,output)
|
||||||
|
:param mu: latent variable's predictive mean
|
||||||
|
:param sigma: latent variable's predictive standard deviation
|
||||||
|
..Note: Only avilable when the output is continuous
|
||||||
|
"""
|
||||||
|
assert not self.discrete, "Hessian not available for discrete outputs."
|
||||||
|
cross_derivative = self._d2nlog_mass_dcross(gp=x[0],obs=x[1])
|
||||||
|
return np.array((self._d2nlog_product_dgp2(gp=x[0],obs=x[1],mu=mu,sigma=sigma),cross_derivative,cross_derivative,self._d2nlog_mass_dobs2(obs=x[1],gp=x[0]))).reshape(2,2)
|
||||||
|
|
||||||
|
def _joint_predictive_mode(self,mu,sigma):
|
||||||
|
"""
|
||||||
|
Negative logarithm of the joint predictive distribution (latent variable and output).
|
||||||
|
|
||||||
|
:param x: tuple (latent variable,output)
|
||||||
|
:param mu: latent variable's predictive mean
|
||||||
|
:param sigma: latent variable's predictive standard deviation
|
||||||
|
"""
|
||||||
|
return sp.optimize.fmin_ncg(self._nlog_joint_predictive_scaled,x0=(mu,self.gp_link.transf(mu)),fprime=self._gradient_nlog_joint_predictive,fhess=self._hessian_nlog_joint_predictive,args=(mu,sigma))
|
||||||
|
|
||||||
|
def predictive_values(self,mu,var):
|
||||||
|
"""
|
||||||
|
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction
|
||||||
|
:param mu: mean of the latent variable
|
||||||
|
:param var: variance of the latent variable
|
||||||
|
"""
|
||||||
|
if isinstance(mu,float) or isinstance(mu,int):
|
||||||
|
mu = [mu]
|
||||||
|
var = [var]
|
||||||
|
pred_mean = []
|
||||||
|
pred_var = []
|
||||||
|
q1 = []
|
||||||
|
q3 = []
|
||||||
|
for m,s in zip(mu,np.sqrt(var)):
|
||||||
|
pred_mean.append(self.predictive_mean(m,s))
|
||||||
|
pred_var.append(self.predictive_variance(m,s,pred_mean[-1]))
|
||||||
|
q1.append(self._predictive_percentiles(.025,m,s))
|
||||||
|
q3.append(self._predictive_percentiles(.975,m,s))
|
||||||
|
pred_mean = np.vstack(pred_mean)
|
||||||
|
pred_var = np.vstack(pred_var)
|
||||||
|
q1 = np.vstack(q1)
|
||||||
|
q3 = np.vstack(q3)
|
||||||
|
return pred_mean, pred_var, q1, q3
|
||||||
85
GPy/likelihoods/noise_models/poisson_noise.py
Normal file
85
GPy/likelihoods/noise_models/poisson_noise.py
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats,special
|
||||||
|
import scipy as sp
|
||||||
|
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import gp_transformations
|
||||||
|
from noise_distributions import NoiseDistribution
|
||||||
|
|
||||||
|
class Poisson(NoiseDistribution):
|
||||||
|
"""
|
||||||
|
Poisson likelihood
|
||||||
|
Y is expected to take values in {0,1,2,...}
|
||||||
|
-----
|
||||||
|
$$
|
||||||
|
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
||||||
|
$$
|
||||||
|
"""
|
||||||
|
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
||||||
|
#self.discrete = True
|
||||||
|
#self.support_limits = (0,np.inf)
|
||||||
|
|
||||||
|
#self.analytical_mean = False
|
||||||
|
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y): #TODO
|
||||||
|
#self.scale = .5*Y.max()
|
||||||
|
#self.shift = Y.mean()
|
||||||
|
return Y #(Y - self.shift)/self.scale
|
||||||
|
|
||||||
|
def _mass(self,gp,obs):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
#obs = obs*self.scale + self.shift
|
||||||
|
return stats.poisson.pmf(obs,self.gp_link.transf(gp))
|
||||||
|
|
||||||
|
def _nlog_mass(self,gp,obs):
|
||||||
|
"""
|
||||||
|
Negative logarithm of the un-normalized distribution: factors that are not a function of gp are omitted
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp) - obs * np.log(self.gp_link.transf(gp)) + np.log(special.gamma(obs+1))
|
||||||
|
|
||||||
|
def _dnlog_mass_dgp(self,gp,obs):
|
||||||
|
return self.gp_link.dtransf_df(gp) * (1. - obs/self.gp_link.transf(gp))
|
||||||
|
|
||||||
|
def _d2nlog_mass_dgp2(self,gp,obs):
|
||||||
|
d2_df = self.gp_link.d2transf_df2(gp)
|
||||||
|
transf = self.gp_link.transf(gp)
|
||||||
|
return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
|
||||||
|
|
||||||
|
def _dnlog_mass_dobs(self,obs,gp): #TODO not needed
|
||||||
|
return special.psi(obs+1) - np.log(self.gp_link.transf(gp))
|
||||||
|
|
||||||
|
def _d2nlog_mass_dobs2(self,obs,gp=None): #TODO not needed
|
||||||
|
return special.polygamma(1,obs)
|
||||||
|
|
||||||
|
def _d2nlog_mass_dcross(self,obs,gp): #TODO not needed
|
||||||
|
return -self.gp_link.dtransf_df(gp)/self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _mean(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _dmean_dgp(self,gp):
|
||||||
|
return self.gp_link.dtransf_df(gp)
|
||||||
|
|
||||||
|
def _d2mean_dgp2(self,gp):
|
||||||
|
return self.gp_link.d2transf_df2(gp)
|
||||||
|
|
||||||
|
def _variance(self,gp):
|
||||||
|
"""
|
||||||
|
Mass (or density) function
|
||||||
|
"""
|
||||||
|
return self.gp_link.transf(gp)
|
||||||
|
|
||||||
|
def _dvariance_dgp(self,gp):
|
||||||
|
return self.gp_link.dtransf_df(gp)
|
||||||
|
|
||||||
|
def _d2variance_dgp2(self,gp):
|
||||||
|
return self.gp_link.d2transf_df2(gp)
|
||||||
|
|
@ -14,3 +14,5 @@ from warped_gp import WarpedGP
|
||||||
from bayesian_gplvm import BayesianGPLVM
|
from bayesian_gplvm import BayesianGPLVM
|
||||||
from mrd import MRD
|
from mrd import MRD
|
||||||
from gradient_checker import GradientChecker
|
from gradient_checker import GradientChecker
|
||||||
|
from gp_multioutput_regression import GPMultioutputRegression
|
||||||
|
from sparse_gp_multioutput_regression import SparseGPMultioutputRegression
|
||||||
|
|
|
||||||
|
|
@ -31,8 +31,8 @@ class FITCClassification(FITC):
|
||||||
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
|
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
|
||||||
|
|
||||||
if likelihood is None:
|
if likelihood is None:
|
||||||
distribution = likelihoods.likelihood_functions.Binomial()
|
noise_model = likelihoods.binomial()
|
||||||
likelihood = likelihoods.EP(Y, distribution)
|
likelihood = likelihoods.EP(Y, noise_model)
|
||||||
elif Y is not None:
|
elif Y is not None:
|
||||||
if not all(Y.flatten() == likelihood.data.flatten()):
|
if not all(Y.flatten() == likelihood.data.flatten()):
|
||||||
raise Warning, 'likelihood.data and Y are different.'
|
raise Warning, 'likelihood.data and Y are different.'
|
||||||
|
|
|
||||||
|
|
@ -31,8 +31,8 @@ class GPClassification(GP):
|
||||||
kernel = kern.rbf(X.shape[1])
|
kernel = kern.rbf(X.shape[1])
|
||||||
|
|
||||||
if likelihood is None:
|
if likelihood is None:
|
||||||
distribution = likelihoods.likelihood_functions.Binomial()
|
noise_model = likelihoods.binomial()
|
||||||
likelihood = likelihoods.EP(Y, distribution)
|
likelihood = likelihoods.EP(Y, noise_model)
|
||||||
elif Y is not None:
|
elif Y is not None:
|
||||||
if not all(Y.flatten() == likelihood.data.flatten()):
|
if not all(Y.flatten() == likelihood.data.flatten()):
|
||||||
raise Warning, 'likelihood.data and Y are different.'
|
raise Warning, 'likelihood.data and Y are different.'
|
||||||
|
|
|
||||||
59
GPy/models/gp_multioutput_regression.py
Normal file
59
GPy/models/gp_multioutput_regression.py
Normal file
|
|
@ -0,0 +1,59 @@
|
||||||
|
# Copyright (c) 2013, Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from ..core import GP
|
||||||
|
from .. import likelihoods
|
||||||
|
from .. import kern
|
||||||
|
from ..util import multioutput
|
||||||
|
|
||||||
|
class GPMultioutputRegression(GP):
|
||||||
|
"""
|
||||||
|
Multiple output Gaussian process with Gaussian noise
|
||||||
|
|
||||||
|
This is a wrapper around the models.GP class, with a set of sensible defaults
|
||||||
|
|
||||||
|
:param X_list: input observations
|
||||||
|
:type X_list: list of numpy arrays (num_data_output_i x input_dim), one array per output
|
||||||
|
:param Y_list: observed values
|
||||||
|
:type Y_list: list of numpy arrays (num_data_output_i x 1), one array per output
|
||||||
|
:param kernel_list: GPy kernels, defaults to rbf
|
||||||
|
:type kernel_list: list of GPy kernels
|
||||||
|
:param noise_variance_list: noise parameters per output, defaults to 1.0 for every output
|
||||||
|
:type noise_variance_list: list of floats
|
||||||
|
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
|
:type normalize_X: False|True
|
||||||
|
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
|
:type normalize_Y: False|True
|
||||||
|
:param W_columns: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
|
||||||
|
:type W_columns: integer
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,W_columns=1):
|
||||||
|
|
||||||
|
self.num_outputs = len(Y_list)
|
||||||
|
assert len(X_list) == self.num_outputs, 'Number of outputs do not match length of inputs list.'
|
||||||
|
|
||||||
|
#Inputs indexing
|
||||||
|
i = 0
|
||||||
|
index = []
|
||||||
|
for x,y in zip(X_list,Y_list):
|
||||||
|
assert x.shape[0] == y.shape[0]
|
||||||
|
index.append(np.repeat(i,x.size)[:,None])
|
||||||
|
i += 1
|
||||||
|
index = np.vstack(index)
|
||||||
|
X = np.hstack([np.vstack(X_list),index])
|
||||||
|
original_dim = X.shape[1] - 1
|
||||||
|
|
||||||
|
#Mixed noise likelihood definition
|
||||||
|
likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,noise_params=noise_variance_list,normalize=normalize_Y)
|
||||||
|
|
||||||
|
#Coregionalization kernel definition
|
||||||
|
if kernel_list is None:
|
||||||
|
kernel_list = [[kern.rbf(original_dim)],[]]
|
||||||
|
mkernel = multioutput.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, CK = kernel_list[0], NC = kernel_list[1], W_columns=W_columns)
|
||||||
|
|
||||||
|
self.multioutput = True
|
||||||
|
GP.__init__(self, X, likelihood, mkernel, normalize_X=normalize_X)
|
||||||
|
self.ensure_default_constraints()
|
||||||
|
|
@ -28,11 +28,11 @@ class SparseGPClassification(SparseGP):
|
||||||
|
|
||||||
def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10):
|
def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10):
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3)
|
kernel = kern.rbf(X.shape[1])# + kern.white(X.shape[1],1e-3)
|
||||||
|
|
||||||
if likelihood is None:
|
if likelihood is None:
|
||||||
distribution = likelihoods.likelihood_functions.Binomial()
|
noise_model = likelihoods.binomial()
|
||||||
likelihood = likelihoods.EP(Y, distribution)
|
likelihood = likelihoods.EP(Y, noise_model)
|
||||||
elif Y is not None:
|
elif Y is not None:
|
||||||
if not all(Y.flatten() == likelihood.data.flatten()):
|
if not all(Y.flatten() == likelihood.data.flatten()):
|
||||||
raise Warning, 'likelihood.data and Y are different.'
|
raise Warning, 'likelihood.data and Y are different.'
|
||||||
|
|
|
||||||
80
GPy/models/sparse_gp_multioutput_regression.py
Normal file
80
GPy/models/sparse_gp_multioutput_regression.py
Normal file
|
|
@ -0,0 +1,80 @@
|
||||||
|
# Copyright (c) 2013, Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from ..core import SparseGP
|
||||||
|
from .. import likelihoods
|
||||||
|
from .. import kern
|
||||||
|
from ..util import multioutput
|
||||||
|
|
||||||
|
class SparseGPMultioutputRegression(SparseGP):
|
||||||
|
"""
|
||||||
|
Sparse multiple output Gaussian process with Gaussian noise
|
||||||
|
|
||||||
|
This is a wrapper around the models.SparseGP class, with a set of sensible defaults
|
||||||
|
|
||||||
|
:param X_list: input observations
|
||||||
|
:type X_list: list of numpy arrays (num_data_output_i x input_dim), one array per output
|
||||||
|
:param Y_list: observed values
|
||||||
|
:type Y_list: list of numpy arrays (num_data_output_i x 1), one array per output
|
||||||
|
:param kernel_list: GPy kernels, defaults to rbf
|
||||||
|
:type kernel_list: list of GPy kernels
|
||||||
|
:param noise_variance_list: noise parameters per output, defaults to 1.0 for every output
|
||||||
|
:type noise_variance_list: list of floats
|
||||||
|
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
|
:type normalize_X: False|True
|
||||||
|
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
|
:type normalize_Y: False|True
|
||||||
|
:param Z_list: inducing inputs (optional)
|
||||||
|
:type Z_list: list of numpy arrays (num_inducing_output_i x input_dim), one array per output | empty list
|
||||||
|
:param num_inducing: number of inducing inputs per output, defaults to 10 (ignored if Z_list is not empty)
|
||||||
|
:type num_inducing: integer
|
||||||
|
:param W_columns: number tuples of the corregionalization parameters 'coregion_W' (see coregionalize kernel documentation)
|
||||||
|
:type W_columns: integer
|
||||||
|
"""
|
||||||
|
#NOTE not tested with uncertain inputs
|
||||||
|
def __init__(self,X_list,Y_list,kernel_list=None,noise_variance_list=None,normalize_X=False,normalize_Y=False,Z_list=[],num_inducing=10,W_columns=1):
|
||||||
|
|
||||||
|
self.num_outputs = len(Y_list)
|
||||||
|
assert len(X_list) == self.num_outputs, 'Number of outputs do not match length of inputs list.'
|
||||||
|
|
||||||
|
#Inducing inputs list
|
||||||
|
if len(Z_list):
|
||||||
|
assert len(Z_list) == self.num_outputs, 'Number of outputs do not match length of inducing inputs list.'
|
||||||
|
else:
|
||||||
|
if isinstance(num_inducing,np.int):
|
||||||
|
num_inducing = [num_inducing] * self.num_outputs
|
||||||
|
num_inducing = np.asarray(num_inducing)
|
||||||
|
assert num_inducing.size == self.num_outputs, 'Number of outputs do not match length of inducing inputs list.'
|
||||||
|
for ni,X in zip(num_inducing,X_list):
|
||||||
|
i = np.random.permutation(X.shape[0])[:ni]
|
||||||
|
Z_list.append(X[i].copy())
|
||||||
|
|
||||||
|
#Inputs and inducing inputs indexing
|
||||||
|
i = 0
|
||||||
|
index = []
|
||||||
|
index_z = []
|
||||||
|
for x,y,z in zip(X_list,Y_list,Z_list):
|
||||||
|
assert x.shape[0] == y.shape[0]
|
||||||
|
index.append(np.repeat(i,x.size)[:,None])
|
||||||
|
index_z.append(np.repeat(i,z.size)[:,None])
|
||||||
|
i += 1
|
||||||
|
index = np.vstack(index)
|
||||||
|
index_z = np.vstack(index_z)
|
||||||
|
X = np.hstack([np.vstack(X_list),index])
|
||||||
|
Z = np.hstack([np.vstack(Z_list),index_z])
|
||||||
|
original_dim = X.shape[1] - 1
|
||||||
|
|
||||||
|
#Mixed noise likelihood definition
|
||||||
|
likelihood = likelihoods.Gaussian_Mixed_Noise(Y_list,noise_params=noise_variance_list,normalize=normalize_Y)
|
||||||
|
|
||||||
|
#Coregionalization kernel definition
|
||||||
|
if kernel_list is None:
|
||||||
|
kernel_list = [[kern.rbf(original_dim)],[]]
|
||||||
|
mkernel = multioutput.build_lcm(input_dim=original_dim, num_outputs=self.num_outputs, CK = kernel_list[0], NC = kernel_list[1], W_columns=W_columns)
|
||||||
|
|
||||||
|
self.multioutput = True
|
||||||
|
SparseGP.__init__(self, X, likelihood, mkernel, Z=Z, normalize_X=normalize_X)
|
||||||
|
self.constrain_fixed('.*iip_\d+_1')
|
||||||
|
self.ensure_default_constraints()
|
||||||
|
|
@ -20,7 +20,11 @@ class SparseGPRegression(SparseGP):
|
||||||
:type normalize_X: False|True
|
:type normalize_X: False|True
|
||||||
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
:type normalize_Y: False|True
|
:type normalize_Y: False|True
|
||||||
|
:param Z: inducing inputs (optional, see note)
|
||||||
|
:type Z: np.ndarray (num_inducing x input_dim) | None
|
||||||
:rtype: model object
|
:rtype: model object
|
||||||
|
:param X_variance: The uncertainty in the measurements of X (Gaussian variance)
|
||||||
|
:type X_variance: np.ndarray (num_data x input_dim) | None
|
||||||
|
|
||||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -199,10 +199,7 @@ class GradientTests(unittest.TestCase):
|
||||||
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
|
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
|
||||||
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
|
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
|
||||||
kernel = GPy.kern.rbf(1)
|
kernel = GPy.kern.rbf(1)
|
||||||
distribution = GPy.likelihoods.likelihood_functions.Binomial()
|
m = GPy.models.GPClassification(X,Y,kernel=kernel)
|
||||||
likelihood = GPy.likelihoods.EP(Y, distribution)
|
|
||||||
m = GPy.core.GP(X, likelihood, kernel)
|
|
||||||
m.ensure_default_constraints()
|
|
||||||
m.update_likelihood_approximation()
|
m.update_likelihood_approximation()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
@ -212,10 +209,11 @@ class GradientTests(unittest.TestCase):
|
||||||
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
|
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
|
||||||
Z = np.linspace(0, 15, 4)[:, None]
|
Z = np.linspace(0, 15, 4)[:, None]
|
||||||
kernel = GPy.kern.rbf(1)
|
kernel = GPy.kern.rbf(1)
|
||||||
distribution = GPy.likelihoods.likelihood_functions.Binomial()
|
m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z)
|
||||||
likelihood = GPy.likelihoods.EP(Y, distribution)
|
#distribution = GPy.likelihoods.likelihood_functions.Binomial()
|
||||||
m = GPy.core.SparseGP(X, likelihood, kernel, Z)
|
#likelihood = GPy.likelihoods.EP(Y, distribution)
|
||||||
m.ensure_default_constraints()
|
#m = GPy.core.SparseGP(X, likelihood, kernel, Z)
|
||||||
|
#m.ensure_default_constraints()
|
||||||
m.update_likelihood_approximation()
|
m.update_likelihood_approximation()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
@ -224,7 +222,7 @@ class GradientTests(unittest.TestCase):
|
||||||
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
|
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
|
||||||
k = GPy.kern.rbf(1) + GPy.kern.white(1)
|
k = GPy.kern.rbf(1) + GPy.kern.white(1)
|
||||||
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
|
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
|
||||||
m = GPy.models.FITCClassification(X, Y=Y)
|
m = GPy.models.FITCClassification(X, Y, kernel = k)
|
||||||
m.update_likelihood_approximation()
|
m.update_likelihood_approximation()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,3 +14,4 @@ import visualize
|
||||||
import decorators
|
import decorators
|
||||||
import classification
|
import classification
|
||||||
import latent_space_visualizations
|
import latent_space_visualizations
|
||||||
|
import multioutput
|
||||||
|
|
|
||||||
35
GPy/util/multioutput.py
Normal file
35
GPy/util/multioutput.py
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
import numpy as np
|
||||||
|
import warnings
|
||||||
|
from .. import kern
|
||||||
|
|
||||||
|
def build_lcm(input_dim, num_outputs, CK = [], NC = [], W_columns=1,W=None,kappa=None):
|
||||||
|
#TODO build_icm or build_lcm
|
||||||
|
"""
|
||||||
|
Builds a kernel for a linear coregionalization model
|
||||||
|
|
||||||
|
:input_dim: Input dimensionality
|
||||||
|
:num_outputs: Number of outputs
|
||||||
|
:param CK: List of coregionalized kernels (i.e., this will be multiplied by a coregionalise kernel).
|
||||||
|
:param K: List of kernels that will be added up together with CK, but won't be multiplied by a coregionalise kernel
|
||||||
|
:param W_columns: number tuples of the corregionalization parameters 'coregion_W'
|
||||||
|
:type W_columns: integer
|
||||||
|
"""
|
||||||
|
|
||||||
|
for k in CK:
|
||||||
|
if k.input_dim <> input_dim:
|
||||||
|
k.input_dim = input_dim
|
||||||
|
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
|
||||||
|
|
||||||
|
for k in NC:
|
||||||
|
if k.input_dim <> input_dim + 1:
|
||||||
|
k.input_dim = input_dim + 1
|
||||||
|
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
|
||||||
|
|
||||||
|
kernel = CK[0].prod(kern.coregionalise(num_outputs,W_columns,W,kappa),tensor=True)
|
||||||
|
for k in CK[1:]:
|
||||||
|
k_coreg = kern.coregionalise(num_outputs,W_columns,W,kappa)
|
||||||
|
kernel += k.prod(k_coreg,tensor=True)
|
||||||
|
for k in NC:
|
||||||
|
kernel += k
|
||||||
|
|
||||||
|
return kernel
|
||||||
|
|
@ -32,4 +32,15 @@ def std_norm_cdf(x):
|
||||||
x = float(x)
|
x = float(x)
|
||||||
return weave.inline(code,arg_names=['x'],support_code=support_code)
|
return weave.inline(code,arg_names=['x'],support_code=support_code)
|
||||||
|
|
||||||
|
def inv_std_norm_cdf(x):
|
||||||
|
"""
|
||||||
|
Inverse cumulative standard Gaussian distribution
|
||||||
|
Based on Winitzki, S. (2008)
|
||||||
|
"""
|
||||||
|
z = 2*x -1
|
||||||
|
ln1z2 = np.log(1-z**2)
|
||||||
|
a = 8*(np.pi -3)/(3*np.pi*(4-np.pi))
|
||||||
|
b = 2/(np.pi * a) + ln1z2/2
|
||||||
|
inv_erf = np.sign(z) * np.sqrt( np.sqrt(b**2 - ln1z2/a) - b )
|
||||||
|
return np.sqrt(2) * inv_erf
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue