mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-09 12:02:38 +02:00
merging last master
This commit is contained in:
commit
1a02c65a61
133 changed files with 13282 additions and 9562 deletions
|
|
@ -17,7 +17,7 @@ before_install:
|
|||
- sudo ln -s /run/shm /dev/shm
|
||||
|
||||
install:
|
||||
- conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.7 scipy=0.12 matplotlib nose sphinx pip nose
|
||||
- conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.9 scipy=0.16 matplotlib nose sphinx pip nose
|
||||
#- pip install .
|
||||
- python setup.py build_ext --inplace
|
||||
#--use-mirrors
|
||||
|
|
|
|||
|
|
@ -1,7 +1 @@
|
|||
James Hensman
|
||||
Nicolo Fusi
|
||||
Ricardo Andrade
|
||||
Nicolas Durrande
|
||||
Alan Saul
|
||||
Max Zwiessele
|
||||
Neil D. Lawrence
|
||||
See contributors.
|
||||
|
|
|
|||
|
|
@ -21,16 +21,18 @@ from . import plotting
|
|||
from .core import Model
|
||||
from .core.parameterization import Param, Parameterized, ObsAr
|
||||
|
||||
from .__version__ import __version__
|
||||
|
||||
#@nottest
|
||||
try:
|
||||
#Get rid of nose dependency by only ignoring if you have nose installed
|
||||
from nose.tools import nottest
|
||||
@nottest
|
||||
def tests():
|
||||
Tester(testing).test(verbose=10)
|
||||
def tests(verbose=10):
|
||||
Tester(testing).test(verbose=verbose)
|
||||
except:
|
||||
def tests():
|
||||
Tester(testing).test(verbose=10)
|
||||
def tests(verbose=10):
|
||||
Tester(testing).test(verbose=verbose)
|
||||
|
||||
def load(file_path):
|
||||
"""
|
||||
|
|
|
|||
1
GPy/__version__.py
Normal file
1
GPy/__version__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
__version__ = "0.8.8"
|
||||
|
|
@ -7,6 +7,6 @@ from .parameterization.param import Param, ParamConcatenation
|
|||
from .parameterization.observable_array import ObsAr
|
||||
|
||||
from .gp import GP
|
||||
#from .svgp import SVGP
|
||||
from .svgp import SVGP
|
||||
from .sparse_gp import SparseGP
|
||||
from .mapping import *
|
||||
|
|
|
|||
307
GPy/core/gp.py
307
GPy/core/gp.py
|
|
@ -60,9 +60,11 @@ class GP(Model):
|
|||
self.normalizer.scale_by(Y)
|
||||
self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
|
||||
self.Y = Y
|
||||
else:
|
||||
elif isinstance(Y, np.ndarray):
|
||||
self.Y = ObsAr(Y)
|
||||
self.Y_normalized = self.Y
|
||||
else:
|
||||
self.Y = Y
|
||||
|
||||
if Y.shape[0] != self.num_data:
|
||||
#There can be cases where we want inputs than outputs, for example if we have multiple latent
|
||||
|
|
@ -104,8 +106,23 @@ class GP(Model):
|
|||
self.link_parameter(self.likelihood)
|
||||
self.posterior = None
|
||||
|
||||
# The predictive variable to be used to predict using the posterior object's
|
||||
# woodbury_vector and woodbury_inv is defined as predictive_variable
|
||||
# as long as the posterior has the right woodbury entries.
|
||||
# It is the input variable used for the covariance between
|
||||
# X_star and the posterior of the GP.
|
||||
# This is usually just a link to self.X (full GP) or self.Z (sparse GP).
|
||||
# Make sure to name this variable and the predict functions will "just work"
|
||||
# In maths the predictive variable is:
|
||||
# K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
|
||||
# W_{pp} := \texttt{Woodbury inv}
|
||||
# p := _predictive_variable
|
||||
|
||||
def set_XY(self, X=None, Y=None, trigger_update=True):
|
||||
@property
|
||||
def _predictive_variable(self):
|
||||
return self.X
|
||||
|
||||
def set_XY(self, X=None, Y=None):
|
||||
"""
|
||||
Set the input / output data of the model
|
||||
This is useful if we wish to change our existing data but maintain the same model
|
||||
|
|
@ -115,7 +132,7 @@ class GP(Model):
|
|||
:param Y: output observations
|
||||
:type Y: np.ndarray
|
||||
"""
|
||||
if trigger_update: self.update_model(False)
|
||||
self.update_model(False)
|
||||
if Y is not None:
|
||||
if self.normalizer is not None:
|
||||
self.normalizer.scale_by(Y)
|
||||
|
|
@ -131,34 +148,33 @@ class GP(Model):
|
|||
assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
|
||||
self.unlink_parameter(self.X)
|
||||
self.X = X
|
||||
self.link_parameters(self.X)
|
||||
self.link_parameter(self.X)
|
||||
else:
|
||||
self.unlink_parameter(self.X)
|
||||
from ..core import Param
|
||||
self.X = Param('latent mean',X)
|
||||
self.link_parameters(self.X)
|
||||
self.link_parameter(self.X)
|
||||
else:
|
||||
self.X = ObsAr(X)
|
||||
if trigger_update: self.update_model(True)
|
||||
if trigger_update: self._trigger_params_changed()
|
||||
self.update_model(True)
|
||||
|
||||
def set_X(self,X, trigger_update=True):
|
||||
def set_X(self,X):
|
||||
"""
|
||||
Set the input data of the model
|
||||
|
||||
:param X: input observations
|
||||
:type X: np.ndarray
|
||||
"""
|
||||
self.set_XY(X=X, trigger_update=trigger_update)
|
||||
self.set_XY(X=X)
|
||||
|
||||
def set_Y(self,Y, trigger_update=True):
|
||||
def set_Y(self,Y):
|
||||
"""
|
||||
Set the output data of the model
|
||||
|
||||
:param X: output observations
|
||||
:type X: np.ndarray
|
||||
"""
|
||||
self.set_XY(Y=Y, trigger_update=trigger_update)
|
||||
self.set_XY(Y=Y)
|
||||
|
||||
def parameters_changed(self):
|
||||
"""
|
||||
|
|
@ -181,7 +197,7 @@ class GP(Model):
|
|||
"""
|
||||
return self._log_marginal_likelihood
|
||||
|
||||
def _raw_predict(self, _Xnew, full_cov=False, kern=None):
|
||||
def _raw_predict(self, Xnew, full_cov=False, kern=None):
|
||||
"""
|
||||
For making predictions, does not account for normalization or likelihood
|
||||
|
||||
|
|
@ -197,24 +213,33 @@ class GP(Model):
|
|||
if kern is None:
|
||||
kern = self.kern
|
||||
|
||||
Kx = kern.K(_Xnew, self.X).T
|
||||
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
|
||||
Kx = kern.K(self._predictive_variable, Xnew)
|
||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||
if len(mu.shape)==1:
|
||||
mu = mu.reshape(-1,1)
|
||||
if full_cov:
|
||||
Kxx = kern.K(_Xnew)
|
||||
var = Kxx - np.dot(Kx.T, WiKx)
|
||||
Kxx = kern.K(Xnew)
|
||||
if self.posterior.woodbury_inv.ndim == 2:
|
||||
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
|
||||
elif self.posterior.woodbury_inv.ndim == 3: # Missing data
|
||||
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
|
||||
from ..util.linalg import mdot
|
||||
for i in range(var.shape[2]):
|
||||
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
|
||||
var = var
|
||||
else:
|
||||
Kxx = kern.Kdiag(_Xnew)
|
||||
var = Kxx - np.sum(WiKx*Kx, 0)
|
||||
var = var.reshape(-1, 1)
|
||||
var[var<0.] = 0.
|
||||
Kxx = kern.Kdiag(Xnew)
|
||||
if self.posterior.woodbury_inv.ndim == 2:
|
||||
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
|
||||
elif self.posterior.woodbury_inv.ndim == 3: # Missing data
|
||||
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
|
||||
for i in range(var.shape[1]):
|
||||
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
|
||||
var = var
|
||||
#add in the mean function
|
||||
if self.mean_function is not None:
|
||||
mu += self.mean_function.f(Xnew)
|
||||
|
||||
#force mu to be a column vector
|
||||
if len(mu.shape)==1: mu = mu[:,None]
|
||||
|
||||
#add the mean function in
|
||||
if not self.mean_function is None:
|
||||
mu += self.mean_function.f(_Xnew)
|
||||
return mu, var
|
||||
|
||||
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
|
||||
|
|
@ -247,7 +272,7 @@ class GP(Model):
|
|||
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
|
||||
return mean, var
|
||||
|
||||
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
|
||||
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None):
|
||||
"""
|
||||
Get the predictive quantiles around the prediction at X
|
||||
|
||||
|
|
@ -255,10 +280,12 @@ class GP(Model):
|
|||
:type X: np.ndarray (Xnew x self.input_dim)
|
||||
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
|
||||
:type quantiles: tuple
|
||||
:param kern: optional kernel to use for prediction
|
||||
:type predict_kw: dict
|
||||
:returns: list of quantiles for each X and predictive quantiles for interval combination
|
||||
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
|
||||
"""
|
||||
m, v = self._raw_predict(X, full_cov=False)
|
||||
m, v = self._raw_predict(X, full_cov=False, kern=kern)
|
||||
if self.normalizer is not None:
|
||||
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
|
||||
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
|
||||
|
|
@ -292,6 +319,120 @@ class GP(Model):
|
|||
return dmu_dX, dv_dX
|
||||
|
||||
|
||||
def predict_jacobian(self, Xnew, kern=None, full_cov=True):
|
||||
"""
|
||||
Compute the derivatives of the posterior of the GP.
|
||||
|
||||
Given a set of points at which to predict X* (size [N*,Q]), compute the
|
||||
mean and variance of the derivative. Resulting arrays are sized:
|
||||
|
||||
dL_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
|
||||
Note that this is the mean and variance of the derivative,
|
||||
not the derivative of the mean and variance! (See predictive_gradients for that)
|
||||
|
||||
dv_dX* -- [N*, Q], (since all outputs have the same variance)
|
||||
If there is missing data, it is not implemented for now, but
|
||||
there will be one output variance per output dimension.
|
||||
|
||||
:param X: The points at which to get the predictive gradients.
|
||||
:type X: np.ndarray (Xnew x self.input_dim)
|
||||
:param kern: The kernel to compute the jacobian for.
|
||||
:param boolean full_cov: whether to return the full covariance of the jacobian.
|
||||
|
||||
:returns: dmu_dX, dv_dX
|
||||
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ]
|
||||
|
||||
Note: We always return sum in input_dim gradients, as the off-diagonals
|
||||
in the input_dim are not needed for further calculations.
|
||||
This is a compromise for increase in speed. Mathematically the jacobian would
|
||||
have another dimension in Q.
|
||||
"""
|
||||
if kern is None:
|
||||
kern = self.kern
|
||||
|
||||
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
|
||||
|
||||
for i in range(self.output_dim):
|
||||
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)
|
||||
|
||||
dK_dXnew_full = np.empty((self._predictive_variable.shape[0], Xnew.shape[0], Xnew.shape[1]))
|
||||
for i in range(self._predictive_variable.shape[0]):
|
||||
dK_dXnew_full[i] = kern.gradients_X([[1.]], Xnew, self._predictive_variable[[i]])
|
||||
|
||||
if full_cov:
|
||||
dK2_dXdX = kern.gradients_XX([[1.]], Xnew)
|
||||
else:
|
||||
dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew)
|
||||
|
||||
def compute_cov_inner(wi):
|
||||
if full_cov:
|
||||
# full covariance gradients:
|
||||
var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
|
||||
else:
|
||||
var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
|
||||
return var_jac
|
||||
|
||||
if self.posterior.woodbury_inv.ndim == 3: # Missing data:
|
||||
if full_cov:
|
||||
var_jac = np.empty((Xnew.shape[0],Xnew.shape[0],Xnew.shape[1],self.output_dim))
|
||||
for d in range(self.posterior.woodbury_inv.shape[2]):
|
||||
var_jac[:, :, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
|
||||
else:
|
||||
var_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
|
||||
for d in range(self.posterior.woodbury_inv.shape[2]):
|
||||
var_jac[:, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
|
||||
else:
|
||||
var_jac = compute_cov_inner(self.posterior.woodbury_inv)
|
||||
return mean_jac, var_jac
|
||||
|
||||
def predict_wishard_embedding(self, Xnew, kern=None, mean=True, covariance=True):
|
||||
"""
|
||||
Predict the wishard embedding G of the GP. This is the density of the
|
||||
input of the GP defined by the probabilistic function mapping f.
|
||||
G = J_mean.T*J_mean + output_dim*J_cov.
|
||||
|
||||
:param array-like Xnew: The points at which to evaluate the magnification.
|
||||
:param :py:class:`~GPy.kern.Kern` kern: The kernel to use for the magnification.
|
||||
|
||||
Supplying only a part of the learning kernel gives insights into the density
|
||||
of the specific kernel part of the input function. E.g. one can see how dense the
|
||||
linear part of a kernel is compared to the non-linear part etc.
|
||||
"""
|
||||
if kern is None:
|
||||
kern = self.kern
|
||||
|
||||
mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False)
|
||||
mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
|
||||
Sigma = np.zeros(mumuT.shape)
|
||||
if var_jac.ndim == 3:
|
||||
Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = var_jac.sum(-1)
|
||||
else:
|
||||
Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = self.output_dim*var_jac
|
||||
G = 0.
|
||||
if mean:
|
||||
G += mumuT
|
||||
if covariance:
|
||||
G += Sigma
|
||||
return G
|
||||
|
||||
def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True):
|
||||
"""
|
||||
Predict the magnification factor as
|
||||
|
||||
sqrt(det(G))
|
||||
|
||||
for each point N in Xnew
|
||||
"""
|
||||
G = self.predict_wishard_embedding(Xnew, kern, mean, covariance)
|
||||
from ..util.linalg import jitchol
|
||||
mag = np.empty(Xnew.shape[0])
|
||||
for n in range(Xnew.shape[0]):
|
||||
try:
|
||||
mag[n] = np.sqrt(np.exp(2*np.sum(np.log(np.diag(jitchol(G[n, :, :]))))))
|
||||
except:
|
||||
mag[n] = np.sqrt(np.linalg.det(G[n, :, :]))
|
||||
return mag
|
||||
|
||||
def posterior_samples_f(self,X,size=10, full_cov=True):
|
||||
"""
|
||||
Samples the posterior GP at the points X.
|
||||
|
|
@ -395,8 +536,8 @@ class GP(Model):
|
|||
def plot(self, plot_limits=None, which_data_rows='all',
|
||||
which_data_ycols='all', fixed_inputs=[],
|
||||
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||
plot_raw=False,
|
||||
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx', predict_kw=None):
|
||||
plot_raw=False, linecol=None,fillcol=None, Y_metadata=None,
|
||||
data_symbol='kx', predict_kw=None, plot_training_data=True, samples_y=0, apply_link=False):
|
||||
"""
|
||||
Plot the posterior of the GP.
|
||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||
|
|
@ -419,7 +560,7 @@ class GP(Model):
|
|||
:param levels: number of levels to plot in a contour plot.
|
||||
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
||||
:type levels: int
|
||||
:param samples: the number of a posteriori samples to plot
|
||||
:param samples: the number of a posteriori samples to plot, p(f*|y)
|
||||
:type samples: int
|
||||
:param fignum: figure to plot on.
|
||||
:type fignum: figure number
|
||||
|
|
@ -433,6 +574,12 @@ class GP(Model):
|
|||
:type Y_metadata: dict
|
||||
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
|
||||
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
|
||||
:param plot_training_data: whether or not to plot the training points
|
||||
:type plot_training_data: boolean
|
||||
:param samples_y: the number of a posteriori samples to plot, p(y*|y)
|
||||
:type samples_y: int
|
||||
:param apply_link: if there is a link function of the likelihood, plot the link(f*) rather than f*, when plotting posterior samples f
|
||||
:type apply_link: boolean
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import models_plots
|
||||
|
|
@ -445,7 +592,103 @@ class GP(Model):
|
|||
which_data_ycols, fixed_inputs,
|
||||
levels, samples, fignum, ax, resolution,
|
||||
plot_raw=plot_raw, Y_metadata=Y_metadata,
|
||||
data_symbol=data_symbol, predict_kw=predict_kw, **kw)
|
||||
data_symbol=data_symbol, predict_kw=predict_kw,
|
||||
plot_training_data=plot_training_data, samples_y=samples_y, apply_link=apply_link, **kw)
|
||||
|
||||
|
||||
def plot_data(self, which_data_rows='all',
|
||||
which_data_ycols='all', visible_dims=None,
|
||||
fignum=None, ax=None, data_symbol='kx'):
|
||||
"""
|
||||
Plot the training data
|
||||
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
|
||||
|
||||
Can plot only part of the data
|
||||
using which_data_rows and which_data_ycols.
|
||||
|
||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
||||
:type plot_limits: np.array
|
||||
:param which_data_rows: which of the training data to plot (default all)
|
||||
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
|
||||
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
||||
:type which_data_ycols: 'all' or a list of integers
|
||||
:param visible_dims: an array specifying the input dimensions to plot (maximum two)
|
||||
:type visible_dims: a numpy array
|
||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||
:type resolution: int
|
||||
:param levels: number of levels to plot in a contour plot.
|
||||
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
||||
:type levels: int
|
||||
:param samples: the number of a posteriori samples to plot, p(f*|y)
|
||||
:type samples: int
|
||||
:param fignum: figure to plot on.
|
||||
:type fignum: figure number
|
||||
:param ax: axes to plot on.
|
||||
:type ax: axes handle
|
||||
:param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
|
||||
:type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
|
||||
:param fillcol: color of fill [Tango.colorsHex['lightBlue']]
|
||||
:type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
|
||||
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
|
||||
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import models_plots
|
||||
kw = {}
|
||||
return models_plots.plot_data(self, which_data_rows,
|
||||
which_data_ycols, visible_dims,
|
||||
fignum, ax, data_symbol, **kw)
|
||||
|
||||
|
||||
def errorbars_trainset(self, which_data_rows='all',
|
||||
which_data_ycols='all', fixed_inputs=[], fignum=None, ax=None,
|
||||
linecol=None, data_symbol='kx', predict_kw=None, plot_training_data=True,lw=None):
|
||||
|
||||
"""
|
||||
Plot the posterior error bars corresponding to the training data
|
||||
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
|
||||
|
||||
Can plot only part of the data
|
||||
using which_data_rows and which_data_ycols.
|
||||
|
||||
:param which_data_rows: which of the training data to plot (default all)
|
||||
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
|
||||
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
||||
:type which_data_rows: 'all' or a list of integers
|
||||
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
||||
:type fixed_inputs: a list of tuples
|
||||
:param fignum: figure to plot on.
|
||||
:type fignum: figure number
|
||||
:param ax: axes to plot on.
|
||||
:type ax: axes handle
|
||||
:param plot_training_data: whether or not to plot the training points
|
||||
:type plot_training_data: boolean
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import models_plots
|
||||
kw = {}
|
||||
if lw is not None:
|
||||
kw['lw'] = lw
|
||||
return models_plots.errorbars_trainset(self, which_data_rows, which_data_ycols, fixed_inputs,
|
||||
fignum, ax, linecol, data_symbol,
|
||||
predict_kw, plot_training_data, **kw)
|
||||
|
||||
|
||||
def plot_magnification(self, labels=None, which_indices=None,
|
||||
resolution=50, ax=None, marker='o', s=40,
|
||||
fignum=None, legend=True,
|
||||
plot_limits=None,
|
||||
aspect='auto', updates=False, plot_inducing=True, kern=None, **kwargs):
|
||||
|
||||
import sys
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import dim_reduction_plots
|
||||
|
||||
return dim_reduction_plots.plot_magnification(self, labels, which_indices,
|
||||
resolution, ax, marker, s,
|
||||
fignum, plot_inducing, legend,
|
||||
plot_limits, aspect, updates, **kwargs)
|
||||
|
||||
|
||||
def input_sensitivity(self, summarize=True):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ class Bijective_mapping(Mapping):
|
|||
also back from f to X. The inverse mapping is called g().
|
||||
"""
|
||||
def __init__(self, input_dim, output_dim, name='bijective_mapping'):
|
||||
super(Bijective_apping, self).__init__(name=name)
|
||||
super(Bijective_mapping, self).__init__(name=name)
|
||||
|
||||
def g(self, f):
|
||||
"""Inverse mapping from output domain of the function to the inputs."""
|
||||
|
|
|
|||
|
|
@ -180,6 +180,7 @@ class Param(Parameterizable, ObsAr):
|
|||
import copy
|
||||
Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
|
||||
return s
|
||||
|
||||
def _setup_observers(self):
|
||||
"""
|
||||
Setup the default observers
|
||||
|
|
|
|||
|
|
@ -197,9 +197,10 @@ class Parameterized(Parameterizable):
|
|||
raise RuntimeError("{} does not seem to be a parameter, remove parameters directly from their respective parents".format(str(param)))
|
||||
|
||||
start = sum([p.size for p in self.parameters[:param._parent_index_]])
|
||||
self._remove_parameter_name(param)
|
||||
self.size -= param.size
|
||||
del self.parameters[param._parent_index_]
|
||||
self._remove_parameter_name(param)
|
||||
|
||||
|
||||
param._disconnect_parent()
|
||||
param.remove_observer(self, self._pass_through_notify_observers)
|
||||
|
|
@ -315,7 +316,7 @@ class Parameterized(Parameterizable):
|
|||
param[:] = val; return
|
||||
except AttributeError:
|
||||
pass
|
||||
object.__setattr__(self, name, val);
|
||||
return object.__setattr__(self, name, val);
|
||||
|
||||
#===========================================================================
|
||||
# Pickling
|
||||
|
|
|
|||
|
|
@ -366,6 +366,7 @@ class InverseGamma(Gamma):
|
|||
def rvs(self, n):
|
||||
return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
|
||||
|
||||
|
||||
class DGPLVM_KFDA(Prior):
|
||||
"""
|
||||
Implementation of the Discriminative Gaussian Process Latent Variable function using
|
||||
|
|
@ -512,6 +513,7 @@ class DGPLVM_KFDA(Prior):
|
|||
self.A = self.compute_A(lst_ni)
|
||||
self.x_shape = x_shape
|
||||
|
||||
|
||||
class DGPLVM(Prior):
|
||||
"""
|
||||
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
|
||||
|
|
@ -669,7 +671,7 @@ class DGPLVM(Prior):
|
|||
M_i = self.compute_Mi(cls)
|
||||
Sb = self.compute_Sb(cls, M_i, M_0)
|
||||
Sw = self.compute_Sw(cls, M_i)
|
||||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||
# sb_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
||||
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
|
||||
|
|
@ -903,7 +905,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
|
|||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
|
||||
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
|
||||
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
|
||||
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
|
||||
|
||||
# This function calculates derivative of the log of prior function
|
||||
|
|
@ -927,7 +929,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
|
|||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
|
||||
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
|
||||
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
|
||||
Sb_inv_N_trans = np.transpose(Sb_inv_N)
|
||||
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
|
||||
Sw_trans = np.transpose(Sw)
|
||||
|
|
@ -1198,6 +1200,7 @@ class DGPLVM_T(Prior):
|
|||
|
||||
|
||||
|
||||
|
||||
class HalfT(Prior):
|
||||
"""
|
||||
Implementation of the half student t probability function, coupled with random variables.
|
||||
|
|
@ -1208,15 +1211,17 @@ class HalfT(Prior):
|
|||
"""
|
||||
domain = _POSITIVE
|
||||
_instances = []
|
||||
def __new__(cls, A, nu): # Singleton:
|
||||
|
||||
def __new__(cls, A, nu): # Singleton:
|
||||
if cls._instances:
|
||||
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||
for instance in cls._instances:
|
||||
if instance().A == A and instance().nu == nu:
|
||||
return instance()
|
||||
return instance()
|
||||
o = super(Prior, cls).__new__(cls, A, nu)
|
||||
cls._instances.append(weakref.ref(o))
|
||||
return cls._instances[-1]()
|
||||
|
||||
def __init__(self, A, nu):
|
||||
self.A = float(A)
|
||||
self.nu = float(nu)
|
||||
|
|
@ -1225,37 +1230,81 @@ class HalfT(Prior):
|
|||
def __str__(self):
|
||||
return "hT({:.2g}, {:.2g})".format(self.A, self.nu)
|
||||
|
||||
def lnpdf(self,theta):
|
||||
return (theta>0) * ( self.constant -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 ) )
|
||||
def lnpdf(self, theta):
|
||||
return (theta > 0) * (self.constant - .5*(self.nu + 1) * np.log(1. + (1./self.nu) * (theta/self.A)**2))
|
||||
|
||||
#theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
|
||||
#lnpdfs = np.zeros_like(theta)
|
||||
#theta = np.array([theta])
|
||||
#above_zero = theta.flatten()>1e-6
|
||||
#v = self.nu
|
||||
#sigma2=self.A
|
||||
#stop
|
||||
#lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5)
|
||||
# - gammaln(v * 0.5)
|
||||
# - 0.5*np.log(sigma2 * v * np.pi)
|
||||
# - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2))
|
||||
#)
|
||||
#return lnpdfs
|
||||
# theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
|
||||
# lnpdfs = np.zeros_like(theta)
|
||||
# theta = np.array([theta])
|
||||
# above_zero = theta.flatten()>1e-6
|
||||
# v = self.nu
|
||||
# sigma2=self.A
|
||||
# stop
|
||||
# lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5)
|
||||
# - gammaln(v * 0.5)
|
||||
# - 0.5*np.log(sigma2 * v * np.pi)
|
||||
# - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2))
|
||||
# )
|
||||
# return lnpdfs
|
||||
|
||||
def lnpdf_grad(self,theta):
|
||||
theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
|
||||
def lnpdf_grad(self, theta):
|
||||
theta = theta if isinstance(theta, np.ndarray) else np.array([theta])
|
||||
grad = np.zeros_like(theta)
|
||||
above_zero = theta>1e-6
|
||||
above_zero = theta > 1e-6
|
||||
v = self.nu
|
||||
sigma2=self.A
|
||||
sigma2 = self.A
|
||||
grad[above_zero] = -0.5*(v+1)*(2*theta[above_zero])/(v*sigma2 + theta[above_zero][0]**2)
|
||||
return grad
|
||||
|
||||
def rvs(self, n):
|
||||
#return np.random.randn(n) * self.sigma + self.mu
|
||||
from scipy.stats import t
|
||||
#[np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)])
|
||||
ret = t.rvs(self.nu,loc=0,scale=self.A, size=n)
|
||||
ret[ret<0] = 0
|
||||
return ret
|
||||
# return np.random.randn(n) * self.sigma + self.mu
|
||||
from scipy.stats import t
|
||||
# [np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)])
|
||||
ret = t.rvs(self.nu, loc=0, scale=self.A, size=n)
|
||||
ret[ret < 0] = 0
|
||||
return ret
|
||||
|
||||
|
||||
class Exponential(Prior):
|
||||
"""
|
||||
Implementation of the Exponential probability function,
|
||||
coupled with random variables.
|
||||
|
||||
:param l: shape parameter
|
||||
|
||||
"""
|
||||
domain = _POSITIVE
|
||||
_instances = []
|
||||
|
||||
def __new__(cls, l): # Singleton:
|
||||
if cls._instances:
|
||||
cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||
for instance in cls._instances:
|
||||
if instance().l == l:
|
||||
return instance()
|
||||
o = super(Exponential, cls).__new__(cls, l)
|
||||
cls._instances.append(weakref.ref(o))
|
||||
return cls._instances[-1]()
|
||||
|
||||
def __init__(self, l):
|
||||
self.l = l
|
||||
|
||||
def __str__(self):
|
||||
return "Exp({:.2g})".format(self.l)
|
||||
|
||||
def summary(self):
|
||||
ret = {"E[x]": 1. / self.l,
|
||||
"E[ln x]": np.nan,
|
||||
"var[x]": 1. / self.l**2,
|
||||
"Entropy": 1. - np.log(self.l),
|
||||
"Mode": 0.}
|
||||
return ret
|
||||
|
||||
def lnpdf(self, x):
|
||||
return np.log(self.l) - self.l * x
|
||||
|
||||
def lnpdf_grad(self, x):
|
||||
return - self.l
|
||||
|
||||
def rvs(self, n):
|
||||
return np.random.exponential(scale=self.l, size=n)
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ class Transformation(object):
|
|||
import matplotlib.pyplot as plt
|
||||
from ...plotting.matplot_dep import base_plots
|
||||
x = np.linspace(-8,8)
|
||||
base_plots.meanplot(x, self.f(x),axes=axes*args,**kw)
|
||||
base_plots.meanplot(x, self.f(x), *args, ax=axes, **kw)
|
||||
axes = plt.gca()
|
||||
axes.set_xlabel(xlabel)
|
||||
axes.set_ylabel(ylabel)
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ class SparseGP(GP):
|
|||
else:
|
||||
#inference_method = ??
|
||||
raise NotImplementedError("what to do what to do?")
|
||||
print("defaulting to ", inference_method, "for latent function inference")
|
||||
print(("defaulting to ", inference_method, "for latent function inference"))
|
||||
|
||||
self.Z = Param('inducing inputs', Z)
|
||||
self.num_inducing = Z.shape[0]
|
||||
|
|
@ -60,6 +60,10 @@ class SparseGP(GP):
|
|||
self.link_parameter(self.Z, index=0)
|
||||
self.posterior = None
|
||||
|
||||
@property
|
||||
def _predictive_variable(self):
|
||||
return self.Z
|
||||
|
||||
def has_uncertain_inputs(self):
|
||||
return isinstance(self.X, VariationalPosterior)
|
||||
|
||||
|
|
@ -117,45 +121,48 @@ class SparseGP(GP):
|
|||
if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of,
|
||||
we take only the diagonal elements across N.
|
||||
|
||||
For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we
|
||||
return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD).
|
||||
This is for both with and without missing data. See for missing data SparseGP implementation py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
|
||||
For uncertain inputs, the SparseGP bound produces cannot predict the full covariance matrix full_cov for now.
|
||||
The implementation of that will follow. However, for each dimension the
|
||||
covariance changes, so if full_cov is False (standard), we return the variance
|
||||
for each dimension [NxD].
|
||||
"""
|
||||
|
||||
if kern is None: kern = self.kern
|
||||
|
||||
if not isinstance(Xnew, VariationalPosterior):
|
||||
Kx = kern.K(self.Z, Xnew)
|
||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||
if full_cov:
|
||||
Kxx = kern.K(Xnew)
|
||||
if self.posterior.woodbury_inv.ndim == 2:
|
||||
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
|
||||
elif self.posterior.woodbury_inv.ndim == 3:
|
||||
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
|
||||
for i in range(var.shape[2]):
|
||||
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
|
||||
var = var
|
||||
else:
|
||||
Kxx = kern.Kdiag(Xnew)
|
||||
if self.posterior.woodbury_inv.ndim == 2:
|
||||
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
|
||||
elif self.posterior.woodbury_inv.ndim == 3:
|
||||
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
|
||||
for i in range(var.shape[1]):
|
||||
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
|
||||
var = var
|
||||
#add in the mean function
|
||||
if self.mean_function is not None:
|
||||
mu += self.mean_function.f(Xnew)
|
||||
# Kx = kern.K(self._predictive_variable, Xnew)
|
||||
# mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||
# if full_cov:
|
||||
# Kxx = kern.K(Xnew)
|
||||
# if self.posterior.woodbury_inv.ndim == 2:
|
||||
# var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
|
||||
# elif self.posterior.woodbury_inv.ndim == 3:
|
||||
# var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
|
||||
# for i in range(var.shape[2]):
|
||||
# var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
|
||||
# var = var
|
||||
# else:
|
||||
# Kxx = kern.Kdiag(Xnew)
|
||||
# if self.posterior.woodbury_inv.ndim == 2:
|
||||
# var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
|
||||
# elif self.posterior.woodbury_inv.ndim == 3:
|
||||
# var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
|
||||
# for i in range(var.shape[1]):
|
||||
# var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
|
||||
# var = var
|
||||
# #add in the mean function
|
||||
# if self.mean_function is not None:
|
||||
# mu += self.mean_function.f(Xnew)
|
||||
mu, var = super(SparseGP, self)._raw_predict(Xnew, full_cov, kern)
|
||||
else:
|
||||
psi0_star = kern.psi0(self.Z, Xnew)
|
||||
psi1_star = kern.psi1(self.Z, Xnew)
|
||||
psi0_star = kern.psi0(self._predictive_variable, Xnew)
|
||||
psi1_star = kern.psi1(self._predictive_variable, Xnew)
|
||||
#psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
|
||||
la = self.posterior.woodbury_vector
|
||||
mu = np.dot(psi1_star, la) # TODO: dimensions?
|
||||
|
||||
if full_cov:
|
||||
raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.")
|
||||
var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1]))
|
||||
di = np.diag_indices(la.shape[1])
|
||||
else:
|
||||
|
|
@ -163,7 +170,7 @@ class SparseGP(GP):
|
|||
|
||||
for i in range(Xnew.shape[0]):
|
||||
_mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
|
||||
psi2_star = kern.psi2(self.Z, NormalPosterior(_mu, _var))
|
||||
psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var))
|
||||
tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
|
||||
|
||||
var_ = mdot(la.T, tmp, la)
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ class SparseGP_MPI(SparseGP):
|
|||
|
||||
"""
|
||||
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False):
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp', Y_metadata=None, mpi_comm=None, normalizer=False):
|
||||
self._IN_OPTIMIZATION_ = False
|
||||
if mpi_comm != None:
|
||||
if inference_method is None:
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
# Copyright (c) 2014, James Hensman, Alex Matthews
|
||||
# Distributed under the terms of the GNU General public License, see LICENSE.txt
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..util import choleskies
|
||||
from .sparse_gp import SparseGP
|
||||
from .parameterization.param import Param
|
||||
from ..inference.latent_function_inference import SVGP as svgp_inf
|
||||
from ..inference.latent_function_inference.svgp import SVGP as svgp_inf
|
||||
|
||||
|
||||
class SVGP(SparseGP):
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ class VerboseOptimization(object):
|
|||
self.model.add_observer(self, self.print_status)
|
||||
self.status = 'running'
|
||||
self.clear = clear_after_finish
|
||||
self.deltat = .2
|
||||
|
||||
self.update()
|
||||
|
||||
|
|
@ -80,6 +79,7 @@ class VerboseOptimization(object):
|
|||
|
||||
def __enter__(self):
|
||||
self.start = time.time()
|
||||
self._time = self.start
|
||||
return self
|
||||
|
||||
def print_out(self, seconds):
|
||||
|
|
@ -143,12 +143,12 @@ class VerboseOptimization(object):
|
|||
def print_status(self, me, which=None):
|
||||
self.update()
|
||||
|
||||
seconds = time.time()-self.start
|
||||
t = time.time()
|
||||
seconds = t-self.start
|
||||
#sys.stdout.write(" "*len(self.message))
|
||||
self.deltat += seconds
|
||||
if self.deltat > .2:
|
||||
if t-self._time > .3 or seconds < .3:
|
||||
self.print_out(seconds)
|
||||
self.deltat = 0
|
||||
self._time = t
|
||||
|
||||
self.iteration += 1
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
import numpy as np
|
||||
try:
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
except:
|
||||
pass
|
||||
import GPy
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ def student_t_approx(optimize=True, plot=True):
|
|||
debug=True
|
||||
if debug:
|
||||
m4.optimize(messages=1)
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
pb.plot(m4.X, m4.inference_method.f_hat)
|
||||
pb.plot(m4.X, m4.Y, 'rx')
|
||||
m4.plot()
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
Gaussian Processes regression examples
|
||||
"""
|
||||
try:
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
except:
|
||||
pass
|
||||
import numpy as np
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ from .expectation_propagation_dtc import EPDTC
|
|||
from .dtc import DTC
|
||||
from .fitc import FITC
|
||||
from .var_dtc_parallel import VarDTC_minibatch
|
||||
#from .svgp import SVGP
|
||||
from .var_gauss import VarGauss
|
||||
|
||||
# class FullLatentFunctionData(object):
|
||||
#
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
import numpy as np
|
||||
from ...core import Model
|
||||
from ...core.parameterization import variational
|
||||
from ...util.linalg import tdot
|
||||
from GPy.core.parameterization.variational import VariationalPosterior
|
||||
|
||||
def infer_newX(model, Y_new, optimize=True, init='L2'):
|
||||
"""
|
||||
|
|
@ -60,18 +62,19 @@ class InferenceX(Model):
|
|||
# self.kern.GPU(True)
|
||||
from copy import deepcopy
|
||||
self.posterior = deepcopy(model.posterior)
|
||||
if hasattr(model, 'variational_prior'):
|
||||
from ...core.parameterization.variational import VariationalPosterior
|
||||
if isinstance(model.X, VariationalPosterior):
|
||||
self.uncertain_input = True
|
||||
from ...models.ss_gplvm import IBPPrior
|
||||
from ...models.ss_mrd import IBPPrior_SSMRD
|
||||
if isinstance(model.variational_prior, IBPPrior) or isinstance(model.variational_prior, IBPPrior_SSMRD):
|
||||
from ...core.parameterization.variational import SpikeAndSlabPrior
|
||||
self.variational_prior = SpikeAndSlabPrior(pi=05,learnPi=False, group_spike=False)
|
||||
self.variational_prior = SpikeAndSlabPrior(pi=0.5, learnPi=False, group_spike=False)
|
||||
else:
|
||||
self.variational_prior = model.variational_prior.copy()
|
||||
else:
|
||||
self.uncertain_input = False
|
||||
if hasattr(model, 'inducing_inputs'):
|
||||
if hasattr(model, 'Z'):
|
||||
self.sparse_gp = True
|
||||
self.Z = model.Z.copy()
|
||||
else:
|
||||
|
|
@ -125,13 +128,13 @@ class InferenceX(Model):
|
|||
wv = wv[:,self.valid_dim]
|
||||
output_dim = self.valid_dim.sum()
|
||||
if self.ninan is not None:
|
||||
self.dL_dpsi2 = beta/2.*(self.posterior.woodbury_inv[:,:,self.valid_dim] - np.einsum('md,od->mo',wv, wv)[:, :, None]).sum(-1)
|
||||
self.dL_dpsi2 = beta/2.*(self.posterior.woodbury_inv[:,:,self.valid_dim] - tdot(wv)[:, :, None]).sum(-1)
|
||||
else:
|
||||
self.dL_dpsi2 = beta/2.*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))
|
||||
self.dL_dpsi2 = beta/2.*(output_dim*self.posterior.woodbury_inv - tdot(wv))
|
||||
self.dL_dpsi1 = beta*np.dot(self.Y[:,self.valid_dim], wv.T)
|
||||
self.dL_dpsi0 = - beta/2.* np.ones(self.Y.shape[0])
|
||||
else:
|
||||
self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))/2.
|
||||
self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - tdot(wv))/2. #np.einsum('md,od->mo',wv, wv)
|
||||
self.dL_dpsi1 = beta*np.dot(self.Y, wv.T)
|
||||
self.dL_dpsi0 = -beta/2.*output_dim* np.ones(self.Y.shape[0])
|
||||
|
||||
|
|
|
|||
|
|
@ -172,6 +172,7 @@ class Laplace(LatentFunctionInference):
|
|||
def obj(Ki_f, f):
|
||||
ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
|
||||
if np.isnan(ll):
|
||||
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
||||
return -np.inf
|
||||
else:
|
||||
return ll
|
||||
|
|
|
|||
|
|
@ -64,9 +64,7 @@ class VarDTC(LatentFunctionInference):
|
|||
def get_VVTfactor(self, Y, prec):
|
||||
return Y * prec # TODO chache this, and make it effective
|
||||
|
||||
|
||||
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None):
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
|
||||
|
||||
_, output_dim = Y.shape
|
||||
uncertain_inputs = isinstance(X, VariationalPosterior)
|
||||
|
|
@ -95,17 +93,28 @@ class VarDTC(LatentFunctionInference):
|
|||
|
||||
# The rather complex computations of A, and the psi stats
|
||||
if uncertain_inputs:
|
||||
psi0 = kern.psi0(Z, X)
|
||||
psi1 = kern.psi1(Z, X)
|
||||
if psi0 is None:
|
||||
psi0 = kern.psi0(Z, X)
|
||||
if psi1 is None:
|
||||
psi1 = kern.psi1(Z, X)
|
||||
if het_noise:
|
||||
psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
|
||||
if psi2 is None:
|
||||
assert len(psi2.shape) == 3 # Need to have not summed out N
|
||||
#FIXME: Need testing
|
||||
psi2_beta = np.sum([psi2[X[i:i+1,:], :, :] * beta_i for i,beta_i in enumerate(beta)],0)
|
||||
else:
|
||||
psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
|
||||
else:
|
||||
psi2_beta = kern.psi2(Z,X) * beta
|
||||
if psi2 is None:
|
||||
psi2 = kern.psi2(Z,X)
|
||||
psi2_beta = psi2 * beta
|
||||
LmInv = dtrtri(Lm)
|
||||
A = LmInv.dot(psi2_beta.dot(LmInv.T))
|
||||
else:
|
||||
psi0 = kern.Kdiag(X)
|
||||
psi1 = kern.K(X, Z)
|
||||
if psi0 is None:
|
||||
psi0 = kern.Kdiag(X)
|
||||
if psi1 is None:
|
||||
psi1 = kern.K(X, Z)
|
||||
if het_noise:
|
||||
tmp = psi1 * (np.sqrt(beta))
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -172,17 +172,22 @@ class VarDTC_minibatch(LatentFunctionInference):
|
|||
if not np.isfinite(Kmm).all():
|
||||
print(Kmm)
|
||||
Lm = jitchol(Kmm)
|
||||
LmInv = dtrtri(Lm)
|
||||
|
||||
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
|
||||
LmInvPsi2LmInvT = LmInv.dot(psi2_full.dot(LmInv.T))
|
||||
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
|
||||
LL = jitchol(Lambda)
|
||||
LLInv = dtrtri(LL)
|
||||
logdet_L = 2.*np.sum(np.log(np.diag(LL)))
|
||||
b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0]
|
||||
bbt = np.square(b).sum()
|
||||
v = dtrtrs(Lm,dtrtrs(LL,b,trans=1)[0],trans=1)[0]
|
||||
LmLLInv = LLInv.dot(LmInv)
|
||||
|
||||
tmp = -backsub_both_sides(LL, tdot(b)+output_dim*np.eye(input_dim), transpose='left')
|
||||
dL_dpsi2R = backsub_both_sides(Lm, tmp+output_dim*np.eye(input_dim), transpose='left')/2.
|
||||
b = psi1Y_full.dot(LmLLInv.T)
|
||||
bbt = np.square(b).sum()
|
||||
v = b.dot(LmLLInv).T
|
||||
LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
|
||||
|
||||
tmp = -LLInv.T.dot(LLinvPsi1TYYTPsi1LLinvT+output_dim*np.eye(input_dim)).dot(LLInv)
|
||||
dL_dpsi2R = LmInv.T.dot(tmp+output_dim*np.eye(input_dim)).dot(LmInv)/2.
|
||||
|
||||
# Cache intermediate results
|
||||
self.midRes['dL_dpsi2R'] = dL_dpsi2R
|
||||
|
|
@ -201,7 +206,7 @@ class VarDTC_minibatch(LatentFunctionInference):
|
|||
# Compute dL_dKmm
|
||||
#======================================================================
|
||||
|
||||
dL_dKmm = dL_dpsi2R - output_dim*backsub_both_sides(Lm, LmInvPsi2LmInvT, transpose='left')/2.
|
||||
dL_dKmm = dL_dpsi2R - output_dim*LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.
|
||||
|
||||
#======================================================================
|
||||
# Compute the Posterior distribution of inducing points p(u|Y)
|
||||
|
|
|
|||
69
GPy/inference/latent_function_inference/var_gauss.py
Normal file
69
GPy/inference/latent_function_inference/var_gauss.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
# Copyright (c) 2015, James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
import numpy as np
|
||||
from ...util.linalg import pdinv
|
||||
from .posterior import Posterior
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class VarGauss(LatentFunctionInference):
|
||||
"""
|
||||
The Variational Gaussian Approximation revisited
|
||||
|
||||
@article{Opper:2009,
|
||||
title = {The Variational Gaussian Approximation Revisited},
|
||||
author = {Opper, Manfred and Archambeau, C{\'e}dric},
|
||||
journal = {Neural Comput.},
|
||||
year = {2009},
|
||||
pages = {786--792},
|
||||
}
|
||||
"""
|
||||
def __init__(self, alpha, beta):
|
||||
"""
|
||||
:param alpha: GPy.core.Param varational parameter
|
||||
:param beta: GPy.core.Param varational parameter
|
||||
"""
|
||||
self.alpha, self.beta = alpha, beta
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, Z=None):
|
||||
if mean_function is not None:
|
||||
raise NotImplementedError
|
||||
num_data, output_dim = Y.shape
|
||||
assert output_dim ==1, "Only one output supported"
|
||||
|
||||
K = kern.K(X)
|
||||
m = K.dot(self.alpha)
|
||||
KB = K*self.beta[:, None]
|
||||
BKB = KB*self.beta[None, :]
|
||||
A = np.eye(num_data) + BKB
|
||||
Ai, LA, _, Alogdet = pdinv(A)
|
||||
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
|
||||
var = np.diag(Sigma).reshape(-1,1)
|
||||
|
||||
F, dF_dm, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, m, var, Y_metadata=Y_metadata)
|
||||
if dF_dthetaL is not None:
|
||||
dL_dthetaL = dF_dthetaL.sum(1).sum(1)
|
||||
else:
|
||||
dL_dthetaL = np.array([])
|
||||
dF_da = np.dot(K, dF_dm)
|
||||
SigmaB = Sigma*self.beta
|
||||
#dF_db_ = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
|
||||
dF_db = -2*np.sum(Sigma**2 * (dF_dv * self.beta), 0)
|
||||
#assert np.allclose(dF_db, dF_db_)
|
||||
|
||||
KL = 0.5*(Alogdet + np.trace(Ai) - num_data + np.sum(m*self.alpha))
|
||||
dKL_da = m
|
||||
A_A2 = Ai - Ai.dot(Ai)
|
||||
dKL_db = np.diag(np.dot(KB.T, A_A2))
|
||||
log_marginal = F.sum() - KL
|
||||
self.alpha.gradient = dF_da - dKL_da
|
||||
self.beta.gradient = dF_db - dKL_db
|
||||
|
||||
# K-gradients
|
||||
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
|
||||
tmp = Ai*self.beta[:, None]/self.beta[None, :]
|
||||
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
|
||||
|
||||
return Posterior(mean=m, cov=Sigma ,K=K),\
|
||||
log_marginal,\
|
||||
{'dL_dK':dF_dK-dKL_dK, 'dL_dthetaL':dL_dthetaL}
|
||||
|
|
@ -1 +1,2 @@
|
|||
from .hmc import HMC
|
||||
from .samplers import *
|
||||
|
|
|
|||
|
|
@ -1,14 +1,10 @@
|
|||
# ## Copyright (c) 2014, Zhenwen Dai
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
from scipy import linalg, optimize
|
||||
import Tango
|
||||
import sys
|
||||
import re
|
||||
import numdifftools as ndt
|
||||
import pdb
|
||||
|
||||
|
||||
try:
|
||||
#In Python 2, cPickle is faster. It does not exist in Python 3 but the underlying code is always used
|
||||
|
|
@ -22,11 +18,11 @@ class Metropolis_Hastings:
|
|||
def __init__(self,model,cov=None):
|
||||
"""Metropolis Hastings, with tunings according to Gelman et al. """
|
||||
self.model = model
|
||||
current = self.model._get_params_transformed()
|
||||
current = self.model.optimizer_array
|
||||
self.D = current.size
|
||||
self.chains = []
|
||||
if cov is None:
|
||||
self.cov = model.Laplace_covariance()
|
||||
self.cov = np.eye(self.D)
|
||||
else:
|
||||
self.cov = cov
|
||||
self.scale = 2.4/np.sqrt(self.D)
|
||||
|
|
@ -37,20 +33,20 @@ class Metropolis_Hastings:
|
|||
if start is None:
|
||||
self.model.randomize()
|
||||
else:
|
||||
self.model._set_params_transformed(start)
|
||||
self.model.optimizer_array = start
|
||||
|
||||
|
||||
|
||||
def sample(self, Ntotal, Nburn, Nthin, tune=True, tune_throughout=False, tune_interval=400):
|
||||
current = self.model._get_params_transformed()
|
||||
fcurrent = self.model.log_likelihood() + self.model.log_prior()
|
||||
def sample(self, Ntotal=10000, Nburn=1000, Nthin=10, tune=True, tune_throughout=False, tune_interval=400):
|
||||
current = self.model.optimizer_array
|
||||
fcurrent = self.model.log_likelihood() + self.model.log_prior() + \
|
||||
self.model._log_det_jacobian()
|
||||
accepted = np.zeros(Ntotal,dtype=np.bool)
|
||||
for it in range(Ntotal):
|
||||
print("sample %d of %d\r"%(it,Ntotal), end=' ')
|
||||
print("sample %d of %d\r"%(it,Ntotal),end="\t")
|
||||
sys.stdout.flush()
|
||||
prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
|
||||
self.model._set_params_transformed(prop)
|
||||
fprop = self.model.log_likelihood() + self.model.log_prior()
|
||||
self.model.optimizer_array = prop
|
||||
fprop = self.model.log_likelihood() + self.model.log_prior() + \
|
||||
self.model._log_det_jacobian()
|
||||
|
||||
if fprop>fcurrent:#sample accepted, going 'uphill'
|
||||
accepted[it] = True
|
||||
|
|
@ -78,10 +74,11 @@ class Metropolis_Hastings:
|
|||
|
||||
def predict(self,function,args):
|
||||
"""Make a prediction for the function, to which we will pass the additional arguments"""
|
||||
param = self.model._get_params()
|
||||
param = self.model.param_array
|
||||
fs = []
|
||||
for p in self.chain:
|
||||
self.model._set_params(p)
|
||||
self.model.param_array = p
|
||||
fs.append(function(*args))
|
||||
self.model._set_params(param)# reset model to starting state
|
||||
# reset model to starting state
|
||||
self.model.param_array = param
|
||||
return fs
|
||||
|
|
|
|||
|
|
@ -38,16 +38,17 @@ class SparseGPMissing(StochasticStorage):
|
|||
import numpy as np
|
||||
self.Y = model.Y_normalized
|
||||
bdict = {}
|
||||
#For N > 1000 array2string default crops
|
||||
opt = np.get_printoptions()
|
||||
np.set_printoptions(threshold=np.inf)
|
||||
for d in range(self.Y.shape[1]):
|
||||
inan = np.isnan(self.Y[:, d])
|
||||
arr_str = np.array2string(inan,
|
||||
np.inf, 0,
|
||||
True, '',
|
||||
formatter={'bool':lambda x: '1' if x else '0'})
|
||||
inan = np.isnan(self.Y)[:, d]
|
||||
arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
|
||||
try:
|
||||
bdict[arr_str][0].append(d)
|
||||
except:
|
||||
bdict[arr_str] = [[d], ~inan]
|
||||
np.set_printoptions(**opt)
|
||||
self.d = bdict.values()
|
||||
|
||||
class SparseGPStochastics(StochasticStorage):
|
||||
|
|
@ -55,32 +56,36 @@ class SparseGPStochastics(StochasticStorage):
|
|||
For the sparse gp we need to store the dimension we are in,
|
||||
and the indices corresponding to those
|
||||
"""
|
||||
def __init__(self, model, batchsize=1):
|
||||
def __init__(self, model, batchsize=1, missing_data=True):
|
||||
self.batchsize = batchsize
|
||||
self.output_dim = model.Y.shape[1]
|
||||
self.Y = model.Y_normalized
|
||||
self.missing_data = missing_data
|
||||
self.reset()
|
||||
self.do_stochastics()
|
||||
|
||||
def do_stochastics(self):
|
||||
import numpy as np
|
||||
if self.batchsize == 1:
|
||||
self.current_dim = (self.current_dim+1)%self.output_dim
|
||||
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.d])]]
|
||||
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
|
||||
else:
|
||||
import numpy as np
|
||||
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
|
||||
bdict = {}
|
||||
for d in self.d:
|
||||
inan = np.isnan(self.Y[:, d])
|
||||
arr_str = int(np.array2string(inan,
|
||||
np.inf, 0,
|
||||
True, '',
|
||||
formatter={'bool':lambda x: '1' if x else '0'}), 2)
|
||||
try:
|
||||
bdict[arr_str][0].append(d)
|
||||
except:
|
||||
bdict[arr_str] = [[d], ~inan]
|
||||
self.d = bdict.values()
|
||||
if self.missing_data:
|
||||
opt = np.get_printoptions()
|
||||
np.set_printoptions(threshold=np.inf)
|
||||
for d in self.d:
|
||||
inan = np.isnan(self.Y[:, d])
|
||||
arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
|
||||
try:
|
||||
bdict[arr_str][0].append(d)
|
||||
except:
|
||||
bdict[arr_str] = [[d], ~inan]
|
||||
np.set_printoptions(**opt)
|
||||
self.d = bdict.values()
|
||||
else:
|
||||
self.d = [[self.d, None]]
|
||||
|
||||
def reset(self):
|
||||
self.current_dim = -1
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from ._src.brownian import Brownian
|
|||
from ._src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
||||
from ._src.mlp import MLP
|
||||
from ._src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
|
||||
from ._src.standard_periodic import StdPeriodic
|
||||
from ._src.independent_outputs import IndependentOutputs, Hierarchical
|
||||
from ._src.coregionalize import Coregionalize
|
||||
from ._src.ODE_UY import ODE_UY
|
||||
|
|
@ -17,7 +18,7 @@ from ._src.eq_ode2 import EQ_ODE2
|
|||
from ._src.trunclinear import TruncLinear,TruncLinear_inf
|
||||
from ._src.splitKern import SplitKern,DEtime
|
||||
from ._src.splitKern import DEtime as DiffGenomeKern
|
||||
|
||||
|
||||
from ._src.spline import Spline
|
||||
from ._src.eq_ode2 import EQ_ODE2
|
||||
from ._src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ class Add(CombinationKernel):
|
|||
|
||||
This kernel will take over the active dims of it's subkernels passed in.
|
||||
"""
|
||||
def __init__(self, subkerns, name='add'):
|
||||
def __init__(self, subkerns, name='sum'):
|
||||
for i, kern in enumerate(subkerns[:]):
|
||||
if isinstance(kern, Add):
|
||||
del subkerns[i]
|
||||
|
|
@ -72,15 +72,28 @@ class Add(CombinationKernel):
|
|||
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
|
||||
return target
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
def gradients_XX(self, dL_dK, X, X2):
|
||||
if X2 is None:
|
||||
target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
|
||||
else:
|
||||
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
|
||||
[target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts]
|
||||
return target
|
||||
|
||||
def gradients_XX_diag(self, dL_dKdiag, X):
|
||||
target = np.zeros(X.shape)
|
||||
[target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts]
|
||||
return target
|
||||
|
||||
@Cache_this(limit=1, force_kwargs=['which_parts'])
|
||||
def psi0(self, Z, variational_posterior):
|
||||
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
@Cache_this(limit=1, force_kwargs=['which_parts'])
|
||||
def psi1(self, Z, variational_posterior):
|
||||
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
@Cache_this(limit=1, force_kwargs=['which_parts'])
|
||||
def psi2(self, Z, variational_posterior):
|
||||
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
|
||||
#return psi2
|
||||
|
|
@ -115,6 +128,41 @@ class Add(CombinationKernel):
|
|||
raise NotImplementedError("psi2 cannot be computed for this kernel")
|
||||
return psi2
|
||||
|
||||
@Cache_this(limit=1, force_kwargs=['which_parts'])
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
psi2 = reduce(np.add, (p.psi2n(Z, variational_posterior) for p in self.parts))
|
||||
#return psi2
|
||||
# compute the "cross" terms
|
||||
from .static import White, Bias
|
||||
from .rbf import RBF
|
||||
#from rbf_inv import RBFInv
|
||||
from .linear import Linear
|
||||
#ffrom fixed import Fixed
|
||||
|
||||
for p1, p2 in itertools.combinations(self.parts, 2):
|
||||
# i1, i2 = p1.active_dims, p2.active_dims
|
||||
# white doesn;t combine with anything
|
||||
if isinstance(p1, White) or isinstance(p2, White):
|
||||
pass
|
||||
# rbf X bias
|
||||
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
|
||||
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
|
||||
tmp = p2.psi1(Z, variational_posterior).sum(axis=0)
|
||||
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
|
||||
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
|
||||
tmp = p1.psi1(Z, variational_posterior).sum(axis=0)
|
||||
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
|
||||
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
|
||||
tmp1 = p1.psi1(Z, variational_posterior)
|
||||
tmp2 = p2.psi1(Z, variational_posterior)
|
||||
psi2 += np.einsum('nm,no->nmo',tmp1,tmp2)+np.einsum('nm,no->nmo',tmp2,tmp1)
|
||||
#(tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
|
||||
else:
|
||||
raise NotImplementedError("psi2 cannot be computed for this kernel")
|
||||
return psi2
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from .static import White, Bias
|
||||
for p1 in self.parts:
|
||||
|
|
@ -126,9 +174,9 @@ class Add(CombinationKernel):
|
|||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
|
||||
def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
|
|
@ -143,9 +191,9 @@ class Add(CombinationKernel):
|
|||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
return target
|
||||
|
||||
|
|
@ -161,9 +209,9 @@ class Add(CombinationKernel):
|
|||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
[np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))]
|
||||
return target_grads
|
||||
|
|
|
|||
|
|
@ -6,7 +6,11 @@ import numpy as np
|
|||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
from ...util.config import config # for assesing whether to use cython
|
||||
from . import coregionalize_cython
|
||||
try:
|
||||
from . import coregionalize_cython
|
||||
config.set('cython', 'working', 'True')
|
||||
except ImportError:
|
||||
config.set('cython', 'working', 'False')
|
||||
|
||||
class Coregionalize(Kern):
|
||||
"""
|
||||
|
|
@ -94,7 +98,7 @@ class Coregionalize(Kern):
|
|||
dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)
|
||||
|
||||
|
||||
dkappa = np.diag(dL_dK_small)
|
||||
dkappa = np.diag(dL_dK_small).copy()
|
||||
dL_dK_small += dL_dK_small.T
|
||||
dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
|
||||
|
||||
|
|
@ -111,7 +115,7 @@ class Coregionalize(Kern):
|
|||
return dL_dK_small
|
||||
|
||||
def _gradient_reduce_cython(self, dL_dK, index, index2):
|
||||
index, index2 = index[:,0], index2[:,0]
|
||||
index, index2 = np.int64(index[:,0]), np.int64(index2[:,0])
|
||||
return coregionalize_cython.gradient_reduce(self.B.shape[0], dL_dK, index, index2)
|
||||
|
||||
|
||||
|
|
@ -126,4 +130,3 @@ class Coregionalize(Kern):
|
|||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
return np.zeros(X.shape)
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,33 +1,37 @@
|
|||
#cython: boundscheck=True
|
||||
#cython: wraparound=True
|
||||
#cython: boundscheck=False
|
||||
#cython: wraparound=False
|
||||
#cython: nonecheck=False
|
||||
import cython
|
||||
import numpy as np
|
||||
cimport numpy as np
|
||||
|
||||
def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X):
|
||||
cdef int N = X.size
|
||||
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, N))
|
||||
for n in range(N):
|
||||
for m in range(N):
|
||||
K[n,m] = B[X[n],X[m]]
|
||||
cdef np.ndarray[np.double_t, ndim=2, mode='c'] K = np.empty((N, N))
|
||||
with nogil:
|
||||
for n in range(N):
|
||||
for m in range(N):
|
||||
K[n, m] = B[X[n], X[m]]
|
||||
return K
|
||||
|
||||
def K_asymmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X, np.ndarray[np.int64_t, ndim=1] X2):
|
||||
cdef int N = X.size
|
||||
cdef int M = X2.size
|
||||
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, M))
|
||||
for n in range(N):
|
||||
for m in range(M):
|
||||
K[n,m] = B[X[n],X2[m]]
|
||||
cdef np.ndarray[np.double_t, ndim=2, mode='c'] K = np.empty((N, M))
|
||||
with nogil:
|
||||
for n in range(N):
|
||||
for m in range(M):
|
||||
K[n, m] = B[X[n], X2[m]]
|
||||
return K
|
||||
|
||||
def gradient_reduce(int D, np.ndarray[double, ndim=2] dL_dK, np.ndarray[np.int64_t, ndim=1] index, np.ndarray[np.int64_t, ndim=1] index2):
|
||||
cdef np.ndarray[np.double_t, ndim=2] dL_dK_small = np.zeros((D, D))
|
||||
cdef np.ndarray[np.double_t, ndim=2, mode='c'] dL_dK_small = np.zeros((D, D))
|
||||
cdef int N = index.size
|
||||
cdef int M = index2.size
|
||||
for i in range(N):
|
||||
for j in range(M):
|
||||
dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
|
||||
with nogil:
|
||||
for i in range(N):
|
||||
for j in range(M):
|
||||
dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
|
||||
return dL_dK_small
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ class IndependentOutputs(CombinationKernel):
|
|||
if X2 is None:
|
||||
# TODO: make use of index_to_slices
|
||||
# FIXME: Broken as X is already sliced out
|
||||
print("Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!")
|
||||
# print("Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!")
|
||||
values = np.unique(X[:,self.index_dim])
|
||||
slices = [X[:,self.index_dim]==i for i in values]
|
||||
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
|
||||
|
|
|
|||
|
|
@ -58,20 +58,9 @@ class Kern(Parameterized):
|
|||
|
||||
self._sliced_X = 0
|
||||
self.useGPU = self._support_GPU and useGPU
|
||||
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool)
|
||||
|
||||
@property
|
||||
def return_psi2_n(self):
|
||||
"""
|
||||
Flag whether to pass back psi2 as NxMxM or MxM, by summing out N.
|
||||
"""
|
||||
return self._return_psi2_n_flag[0]
|
||||
@return_psi2_n.setter
|
||||
def return_psi2_n(self, val):
|
||||
def visit(self):
|
||||
if isinstance(self, Kern):
|
||||
self._return_psi2_n_flag[0]=val
|
||||
self.traverse(visit)
|
||||
from .psi_comp import PSICOMP_GH
|
||||
self.psicomp = PSICOMP_GH()
|
||||
|
||||
@Cache_this(limit=20)
|
||||
def _slice_X(self, X):
|
||||
|
|
@ -81,6 +70,9 @@ class Kern(Parameterized):
|
|||
"""
|
||||
Compute the kernel function.
|
||||
|
||||
.. math::
|
||||
K_{ij} = k(X_i, X_j)
|
||||
|
||||
:param X: the first set of inputs to the kernel
|
||||
:param X2: (optional) the second set of arguments to the kernel. If X2
|
||||
is None, this is passed throgh to the 'part' object, which
|
||||
|
|
@ -88,16 +80,64 @@ class Kern(Parameterized):
|
|||
"""
|
||||
raise NotImplementedError
|
||||
def Kdiag(self, X):
|
||||
"""
|
||||
The diagonal of the kernel matrix K
|
||||
|
||||
.. math::
|
||||
Kdiag_{i} = k(X_i, X_i)
|
||||
"""
|
||||
raise NotImplementedError
|
||||
def psi0(self, Z, variational_posterior):
|
||||
raise NotImplementedError
|
||||
"""
|
||||
.. math::
|
||||
\psi_0 = \sum_{i=0}^{n}E_{q(X)}[k(X_i, X_i)]
|
||||
"""
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
|
||||
def psi1(self, Z, variational_posterior):
|
||||
raise NotImplementedError
|
||||
"""
|
||||
.. math::
|
||||
\psi_1^{n,m} = E_{q(X)}[k(X_n, Z_m)]
|
||||
"""
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
|
||||
def psi2(self, Z, variational_posterior):
|
||||
raise NotImplementedError
|
||||
"""
|
||||
.. math::
|
||||
\psi_2^{m,m'} = \sum_{i=0}^{n}E_{q(X)}[ k(Z_m, X_i) k(X_i, Z_{m'})]
|
||||
"""
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
"""
|
||||
.. math::
|
||||
\psi_2^{n,m,m'} = E_{q(X)}[ k(Z_m, X_n) k(X_n, Z_{m'})]
|
||||
|
||||
Thus, we do not sum out n, compared to psi2
|
||||
"""
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
|
||||
def gradients_X(self, dL_dK, X, X2):
|
||||
"""
|
||||
.. math::
|
||||
|
||||
\\frac{\partial L}{\partial X} = \\frac{\partial L}{\partial K}\\frac{\partial K}{\partial X}
|
||||
"""
|
||||
raise NotImplementedError
|
||||
def gradients_X_X2(self, dL_dK, X, X2):
|
||||
return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X)
|
||||
def gradients_XX(self, dL_dK, X, X2):
|
||||
"""
|
||||
.. math::
|
||||
|
||||
\\frac{\partial^2 L}{\partial X\partial X_2} = \\frac{\partial L}{\partial K}\\frac{\partial^2 K}{\partial X\partial X_2}
|
||||
"""
|
||||
raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel")
|
||||
def gradients_XX_diag(self, dL_dKdiag, X):
|
||||
"""
|
||||
The diagonal of the second derivative w.r.t. X and X2
|
||||
"""
|
||||
raise(NotImplementedError, "This is the diagonal of the second derivative of K wrt X and X2, and not implemented for this kernel")
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
"""
|
||||
The diagonal of the derivative w.r.t. X
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
|
|
@ -113,27 +153,35 @@ class Kern(Parameterized):
|
|||
Set the gradients of all parameters when doing inference with
|
||||
uncertain inputs, using expectations of the kernel.
|
||||
|
||||
The esential maths is
|
||||
The essential maths is
|
||||
|
||||
dL_d{theta_i} = dL_dpsi0 * dpsi0_d{theta_i} +
|
||||
dL_dpsi1 * dpsi1_d{theta_i} +
|
||||
dL_dpsi2 * dpsi2_d{theta_i}
|
||||
.. math::
|
||||
|
||||
\\frac{\partial L}{\partial \\theta_i} & = \\frac{\partial L}{\partial \psi_0}\\frac{\partial \psi_0}{\partial \\theta_i}\\
|
||||
& \quad + \\frac{\partial L}{\partial \psi_1}\\frac{\partial \psi_1}{\partial \\theta_i}\\
|
||||
& \quad + \\frac{\partial L}{\partial \psi_2}\\frac{\partial \psi_2}{\partial \\theta_i}
|
||||
|
||||
Thus, we push the different derivatives through the gradients of the psi
|
||||
statistics. Be sure to set the gradients for all kernel
|
||||
parameters here.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0]
|
||||
self.gradient[:] = dtheta
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None):
|
||||
"""
|
||||
Returns the derivative of the objective wrt Z, using the chain rule
|
||||
through the expectation variables.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[1]
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
"""
|
||||
Compute the gradients wrt the parameters of the variational
|
||||
distruibution q(X), chain-ruling via the expectations of the kernel
|
||||
"""
|
||||
raise NotImplementedError
|
||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2:]
|
||||
|
||||
def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs):
|
||||
"""
|
||||
|
|
@ -172,7 +220,7 @@ class Kern(Parameterized):
|
|||
def __iadd__(self, other):
|
||||
return self.add(other)
|
||||
|
||||
def add(self, other, name='add'):
|
||||
def add(self, other, name='sum'):
|
||||
"""
|
||||
Add another kernel to this one.
|
||||
|
||||
|
|
@ -208,8 +256,6 @@ class Kern(Parameterized):
|
|||
|
||||
:param other: the other kernel to be added
|
||||
:type other: GPy.kern
|
||||
:param tensor: whether or not to use the tensor space (default is false).
|
||||
:type tensor: bool
|
||||
|
||||
"""
|
||||
assert isinstance(other, Kern), "only kernels can be multiplied to kernels..."
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
'''
|
||||
Created on 11 Mar 2014
|
||||
|
||||
@author: maxz
|
||||
@author: @mzwiessele
|
||||
|
||||
This module provides a meta class for the kernels. The meta class is for
|
||||
slicing the inputs (X, X2) for the kernels, before K (or any other method involving X)
|
||||
gets calls. The `active_dims` of a kernel decide which dimensions the kernel works on.
|
||||
'''
|
||||
from ...core.parameterization.parameterized import ParametersChangedMeta
|
||||
import numpy as np
|
||||
|
|
@ -19,20 +23,27 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
|
|||
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
|
||||
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
|
||||
put_clean(dct, 'gradients_X', _slice_gradients_X)
|
||||
put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
|
||||
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
|
||||
put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag)
|
||||
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
|
||||
|
||||
put_clean(dct, 'psi0', _slice_psi)
|
||||
put_clean(dct, 'psi1', _slice_psi)
|
||||
put_clean(dct, 'psi2', _slice_psi)
|
||||
put_clean(dct, 'psi2n', _slice_psi)
|
||||
put_clean(dct, 'update_gradients_expectations', _slice_update_gradients_expectations)
|
||||
put_clean(dct, 'gradients_Z_expectations', _slice_gradients_Z_expectations)
|
||||
put_clean(dct, 'gradients_qX_expectations', _slice_gradients_qX_expectations)
|
||||
return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
|
||||
|
||||
class _Slice_wrap(object):
|
||||
def __init__(self, k, X, X2=None):
|
||||
def __init__(self, k, X, X2=None, ret_shape=None):
|
||||
self.k = k
|
||||
self.shape = X.shape
|
||||
if ret_shape is None:
|
||||
self.shape = X.shape
|
||||
else:
|
||||
self.shape = ret_shape
|
||||
assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape)
|
||||
if X2 is not None:
|
||||
assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
|
||||
|
|
@ -54,7 +65,10 @@ class _Slice_wrap(object):
|
|||
def handle_return_array(self, return_val):
|
||||
if self.ret:
|
||||
ret = np.zeros(self.shape)
|
||||
ret[:, self.k.active_dims] = return_val
|
||||
if len(self.shape) == 2:
|
||||
ret[:, self.k.active_dims] = return_val
|
||||
elif len(self.shape) == 3:
|
||||
ret[:, :, self.k.active_dims] = return_val
|
||||
return ret
|
||||
return return_val
|
||||
|
||||
|
|
@ -98,6 +112,19 @@ def _slice_gradients_X(f):
|
|||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_XX(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dK, X, X2=None):
|
||||
if X2 is None:
|
||||
N, M = X.shape[0], X.shape[0]
|
||||
else:
|
||||
N, M = X.shape[0], X2.shape[0]
|
||||
with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s:
|
||||
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
|
||||
return ret
|
||||
return wrap
|
||||
|
||||
def _slice_gradients_X_diag(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dKdiag, X):
|
||||
|
|
@ -124,7 +151,8 @@ def _slice_update_gradients_expectations(f):
|
|||
|
||||
def _slice_gradients_Z_expectations(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||
with _Slice_wrap(self, Z, variational_posterior) as s:
|
||||
ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2))
|
||||
return ret
|
||||
|
|
@ -132,7 +160,8 @@ def _slice_gradients_Z_expectations(f):
|
|||
|
||||
def _slice_gradients_qX_expectations(f):
|
||||
@wraps(f)
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
|
||||
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
|
||||
with _Slice_wrap(self, variational_posterior, Z) as s:
|
||||
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X))
|
||||
r2 = ret[:2]
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ class Linear(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
|
||||
k(x,y) = \sum_{i=1}^{\\text{input_dim}} \sigma^2_i x_iy_i
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
|
|
@ -100,6 +100,12 @@ class Linear(Kern):
|
|||
#return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
return np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)
|
||||
|
||||
def gradients_XX(self, dL_dK, X, X2=None):
|
||||
if X2 is None:
|
||||
return 2*np.ones(X.shape)*self.variances
|
||||
else:
|
||||
return np.ones(X.shape)*self.variances
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
return 2.*self.variances*dL_dKdiag[:,None]*X
|
||||
|
||||
|
|
@ -111,26 +117,29 @@ class Linear(Kern):
|
|||
#---------------------------------------#
|
||||
|
||||
def psi0(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[0]
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
|
||||
|
||||
def psi1(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[1]
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
|
||||
|
||||
def psi2(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[2]
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[2]
|
||||
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
dL_dvar = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[0]
|
||||
dL_dvar = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0]
|
||||
if self.ARD:
|
||||
self.variances.gradient = dL_dvar
|
||||
else:
|
||||
self.variances.gradient = dL_dvar.sum()
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[1]
|
||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[1]
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[2:]
|
||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2:]
|
||||
|
||||
class LinearFull(Kern):
|
||||
def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'):
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ from .kern import Kern
|
|||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
import numpy as np
|
||||
from ...util.linalg import tdot
|
||||
from ...util.caching import Cache_this
|
||||
four_over_tau = 2./np.pi
|
||||
|
||||
class MLP(Kern):
|
||||
|
|
@ -31,105 +33,116 @@ class MLP(Kern):
|
|||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=100., active_dims=None, name='mlp'):
|
||||
def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=1., ARD=False, active_dims=None, name='mlp'):
|
||||
super(MLP, self).__init__(input_dim, active_dims, name)
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
self.ARD= ARD
|
||||
if ARD:
|
||||
wv = np.empty((input_dim,))
|
||||
wv[:] = weight_variance
|
||||
weight_variance = wv
|
||||
self.weight_variance = Param('weight_variance', weight_variance, Logexp())
|
||||
self.bias_variance = Param('bias_variance', bias_variance, Logexp())
|
||||
self.link_parameters(self.variance, self.weight_variance, self.bias_variance)
|
||||
|
||||
|
||||
@Cache_this(limit=20, ignore_args=())
|
||||
def K(self, X, X2=None):
|
||||
self._K_computations(X, X2)
|
||||
return self.variance*self._K_dvar
|
||||
if X2 is None:
|
||||
X_denom = np.sqrt(self._comp_prod(X)+1.)
|
||||
X2_denom = X_denom
|
||||
X2 = X
|
||||
else:
|
||||
X_denom = np.sqrt(self._comp_prod(X)+1.)
|
||||
X2_denom = np.sqrt(self._comp_prod(X2)+1.)
|
||||
XTX = self._comp_prod(X,X2)/X_denom[:,None]/X2_denom[None,:]
|
||||
return self.variance*four_over_tau*np.arcsin(XTX)
|
||||
|
||||
@Cache_this(limit=20, ignore_args=())
|
||||
def Kdiag(self, X):
|
||||
"""Compute the diagonal of the covariance matrix for X."""
|
||||
self._K_diag_computations(X)
|
||||
return self.variance*self._K_diag_dvar
|
||||
X_prod = self._comp_prod(X)
|
||||
return self.variance*four_over_tau*np.arcsin(X_prod/(X_prod+1.))
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
self._K_computations(X, X2)
|
||||
self.variance.gradient = np.sum(self._K_dvar*dL_dK)
|
||||
|
||||
denom3 = self._K_denom**3
|
||||
base = four_over_tau*self.variance/np.sqrt(1-self._K_asin_arg*self._K_asin_arg)
|
||||
base_cov_grad = base*dL_dK
|
||||
|
||||
if X2 is None:
|
||||
vec = np.diag(self._K_inner_prod)
|
||||
self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom
|
||||
-.5*self._K_numer/denom3
|
||||
*(np.outer((self.weight_variance*vec+self.bias_variance+1.), vec)
|
||||
+np.outer(vec,(self.weight_variance*vec+self.bias_variance+1.))))*base_cov_grad).sum()
|
||||
self.bias_variance.gradient = ((1./self._K_denom
|
||||
-.5*self._K_numer/denom3
|
||||
*((vec[None, :]+vec[:, None])*self.weight_variance
|
||||
+2.*self.bias_variance + 2.))*base_cov_grad).sum()
|
||||
else:
|
||||
vec1 = (X*X).sum(1)
|
||||
vec2 = (X2*X2).sum(1)
|
||||
self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom
|
||||
-.5*self._K_numer/denom3
|
||||
*(np.outer((self.weight_variance*vec1+self.bias_variance+1.), vec2) + np.outer(vec1, self.weight_variance*vec2 + self.bias_variance+1.)))*base_cov_grad).sum()
|
||||
self.bias_variance.gradient = ((1./self._K_denom
|
||||
-.5*self._K_numer/denom3
|
||||
*((vec1[:, None]+vec2[None, :])*self.weight_variance
|
||||
+ 2*self.bias_variance + 2.))*base_cov_grad).sum()
|
||||
dvar, dw, db = self._comp_grads(dL_dK, X, X2)[:3]
|
||||
self.variance.gradient = dvar
|
||||
self.weight_variance.gradient = dw
|
||||
self.bias_variance.gradient = db
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
self._K_diag_computations(X)
|
||||
self.variance.gradient = np.sum(self._K_diag_dvar*dL_dKdiag)
|
||||
|
||||
base = four_over_tau*self.variance/np.sqrt(1-self._K_diag_asin_arg*self._K_diag_asin_arg)
|
||||
base_cov_grad = base*dL_dKdiag/np.square(self._K_diag_denom)
|
||||
|
||||
self.weight_variance.gradient = (base_cov_grad*np.square(X).sum(axis=1)).sum()
|
||||
self.bias_variance.gradient = base_cov_grad.sum()
|
||||
dvar, dw, db = self._comp_grads_diag(dL_dKdiag, X)[:3]
|
||||
self.variance.gradient = dvar
|
||||
self.weight_variance.gradient = dw
|
||||
self.bias_variance.gradient = db
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2):
|
||||
"""Derivative of the covariance matrix with respect to X"""
|
||||
self._K_computations(X, X2)
|
||||
arg = self._K_asin_arg
|
||||
numer = self._K_numer
|
||||
denom = self._K_denom
|
||||
denom3 = denom*denom*denom
|
||||
if X2 is not None:
|
||||
vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1.
|
||||
return four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
|
||||
else:
|
||||
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
|
||||
return 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
|
||||
return self._comp_grads(dL_dK, X, X2)[3]
|
||||
|
||||
def gradients_X_X2(self, dL_dK, X, X2):
|
||||
"""Derivative of the covariance matrix with respect to X"""
|
||||
return self._comp_grads(dL_dK, X, X2)[3:]
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
"""Gradient of diagonal of covariance with respect to X"""
|
||||
self._K_diag_computations(X)
|
||||
arg = self._K_diag_asin_arg
|
||||
denom = self._K_diag_denom
|
||||
#numer = self._K_diag_numer
|
||||
return four_over_tau*2.*self.weight_variance*self.variance*X*(1./denom*(1. - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None]
|
||||
return self._comp_grads_diag(dL_dKdiag, X)[3]
|
||||
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
"""Pre-computations for the covariance matrix (used for computing the covariance and its gradients."""
|
||||
@Cache_this(limit=50, ignore_args=())
|
||||
def _comp_prod(self, X, X2=None):
|
||||
if X2 is None:
|
||||
self._K_inner_prod = np.dot(X,X.T)
|
||||
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
|
||||
vec = np.diag(self._K_numer) + 1.
|
||||
self._K_denom = np.sqrt(np.outer(vec,vec))
|
||||
return (np.square(X)*self.weight_variance).sum(axis=1)+self.bias_variance
|
||||
else:
|
||||
self._K_inner_prod = np.dot(X,X2.T)
|
||||
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
|
||||
vec1 = (X*X).sum(1)*self.weight_variance + self.bias_variance + 1.
|
||||
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
|
||||
self._K_denom = np.sqrt(np.outer(vec1,vec2))
|
||||
self._K_asin_arg = self._K_numer/self._K_denom
|
||||
self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg)
|
||||
return (X*self.weight_variance).dot(X2.T)+self.bias_variance
|
||||
|
||||
def _K_diag_computations(self, X):
|
||||
"""Pre-computations concerning the diagonal terms (used for computation of diagonal and its gradients)."""
|
||||
self._K_diag_numer = (X*X).sum(1)*self.weight_variance + self.bias_variance
|
||||
self._K_diag_denom = self._K_diag_numer+1.
|
||||
self._K_diag_asin_arg = self._K_diag_numer/self._K_diag_denom
|
||||
self._K_diag_dvar = four_over_tau*np.arcsin(self._K_diag_asin_arg)
|
||||
@Cache_this(limit=20, ignore_args=(1,))
|
||||
def _comp_grads(self, dL_dK, X, X2=None):
|
||||
var,w,b = self.variance, self.weight_variance, self.bias_variance
|
||||
K = self.K(X, X2)
|
||||
dvar = (dL_dK*K).sum()/var
|
||||
X_prod = self._comp_prod(X)
|
||||
X2_prod = self._comp_prod(X2) if X2 is not None else X_prod
|
||||
XTX = self._comp_prod(X,X2) if X2 is not None else self._comp_prod(X, X)
|
||||
common = var*four_over_tau/np.sqrt((X_prod[:,None]+1.)*(X2_prod[None,:]+1.)-np.square(XTX))*dL_dK
|
||||
if self.ARD:
|
||||
if X2 is not None:
|
||||
XX2 = X[:,None,:]*X2[None,:,:] if X2 is not None else X[:,None,:]*X[None,:,:]
|
||||
XX = np.square(X)
|
||||
X2X2 = np.square(X2)
|
||||
Q = self.weight_variance.shape[0]
|
||||
common_XTX = common*XTX
|
||||
dw = np.dot(common.flat,XX2.reshape(-1,Q)) -( (common_XTX.sum(1)/(X_prod+1.)).T.dot(XX)+(common_XTX.sum(0)/(X2_prod+1.)).dot(X2X2))/2
|
||||
else:
|
||||
XX2 = X[:,None,:]*X[None,:,:]
|
||||
XX = np.square(X)
|
||||
Q = self.weight_variance.shape[0]
|
||||
common_XTX = common*XTX
|
||||
dw = np.dot(common.flat,XX2.reshape(-1,Q)) - ((common_XTX.sum(0)+common_XTX.sum(1))/(X_prod+1.)).dot(XX)/2
|
||||
else:
|
||||
dw = (common*((XTX-b)/w-XTX*(((X_prod-b)/(w*(X_prod+1.)))[:,None]+((X2_prod-b)/(w*(X2_prod+1.)))[None,:])/2.)).sum()
|
||||
db = (common*(1.-XTX*(1./(X_prod[:,None]+1.)+1./(X2_prod[None,:]+1.))/2.)).sum()
|
||||
if X2 is None:
|
||||
common = common+common.T
|
||||
dX = common.dot(X)*w-((common*XTX).sum(axis=1)/(X_prod+1.))[:,None]*X*w
|
||||
dX2 = dX
|
||||
else:
|
||||
dX = common.dot(X2)*w-((common*XTX).sum(axis=1)/(X_prod+1.))[:,None]*X*w
|
||||
dX2 = common.T.dot(X)*w-((common*XTX).sum(axis=0)/(X2_prod+1.))[:,None]*X2*w
|
||||
return dvar, dw, db, dX, dX2
|
||||
|
||||
@Cache_this(limit=20, ignore_args=(1,))
|
||||
def _comp_grads_diag(self, dL_dKdiag, X):
|
||||
var,w,b = self.variance, self.weight_variance, self.bias_variance
|
||||
K = self.Kdiag(X)
|
||||
dvar = (dL_dKdiag*K).sum()/var
|
||||
X_prod = self._comp_prod(X)
|
||||
common = var*four_over_tau/(np.sqrt(1-np.square(X_prod/(X_prod+1)))*np.square(X_prod+1))*dL_dKdiag
|
||||
if self.ARD:
|
||||
XX = np.square(X)
|
||||
dw = np.dot(common,XX)
|
||||
else:
|
||||
dw = (common*(X_prod-b)).sum()/w
|
||||
db = common.sum()
|
||||
dX = common[:,None]*X*w*2
|
||||
return dvar, dw, db, dX
|
||||
|
|
|
|||
|
|
@ -27,8 +27,6 @@ class Prod(CombinationKernel):
|
|||
|
||||
:param k1, k2: the kernels to multiply
|
||||
:type k1, k2: Kern
|
||||
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
|
||||
:type tensor: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -9,18 +9,34 @@ from . import ssrbf_psi_comp
|
|||
from . import sslinear_psi_comp
|
||||
from . import linear_psi_comp
|
||||
|
||||
class PSICOMP_RBF(Pickleable):
|
||||
@Cache_this(limit=2, ignore_args=(0,))
|
||||
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
|
||||
|
||||
class PSICOMP(Pickleable):
|
||||
|
||||
def psicomputations(self, kern, Z, qX, return_psi2_n=False):
|
||||
raise NotImplementedError("Abstract method!")
|
||||
|
||||
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, qX):
|
||||
raise NotImplementedError("Abstract method!")
|
||||
|
||||
def _setup_observers(self):
|
||||
pass
|
||||
|
||||
from .gaussherm import PSICOMP_GH
|
||||
|
||||
class PSICOMP_RBF(PSICOMP):
|
||||
@Cache_this(limit=5, ignore_args=(0,))
|
||||
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
|
||||
variance, lengthscale = kern.variance, kern.lengthscale
|
||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||
return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
|
||||
return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=return_psi2_n)
|
||||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
raise ValueError("unknown distriubtion received for psi-statistics")
|
||||
|
||||
@Cache_this(limit=2, ignore_args=(0,1,2,3))
|
||||
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
@Cache_this(limit=5, ignore_args=(0,2,3,4))
|
||||
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
variance, lengthscale = kern.variance, kern.lengthscale
|
||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||
return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
|
||||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
|
|
@ -28,28 +44,26 @@ class PSICOMP_RBF(Pickleable):
|
|||
else:
|
||||
raise ValueError("unknown distriubtion received for psi-statistics")
|
||||
|
||||
def _setup_observers(self):
|
||||
pass
|
||||
class PSICOMP_Linear(PSICOMP):
|
||||
|
||||
class PSICOMP_Linear(Pickleable):
|
||||
|
||||
@Cache_this(limit=2, ignore_args=(0,))
|
||||
def psicomputations(self, variance, Z, variational_posterior):
|
||||
@Cache_this(limit=5, ignore_args=(0,))
|
||||
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
|
||||
variances = kern.variances
|
||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||
return linear_psi_comp.psicomputations(variance, Z, variational_posterior)
|
||||
return linear_psi_comp.psicomputations(variances, Z, variational_posterior, return_psi2_n=return_psi2_n)
|
||||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
return sslinear_psi_comp.psicomputations(variance, Z, variational_posterior)
|
||||
return sslinear_psi_comp.psicomputations(variances, Z, variational_posterior)
|
||||
else:
|
||||
raise ValueError("unknown distriubtion received for psi-statistics")
|
||||
|
||||
@Cache_this(limit=2, ignore_args=(0,1,2,3))
|
||||
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
|
||||
@Cache_this(limit=2, ignore_args=(0,2,3,4))
|
||||
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
variances = kern.variances
|
||||
if isinstance(variational_posterior, variational.NormalPosterior):
|
||||
return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
|
||||
return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variances, Z, variational_posterior)
|
||||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
|
||||
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variances, Z, variational_posterior)
|
||||
else:
|
||||
raise ValueError("unknown distriubtion received for psi-statistics")
|
||||
|
||||
def _setup_observers(self):
|
||||
pass
|
||||
|
||||
|
|
|
|||
100
GPy/kern/_src/psi_comp/gaussherm.py
Normal file
100
GPy/kern/_src/psi_comp/gaussherm.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
"""
|
||||
An approximated psi-statistics implementation based on Gauss-Hermite Quadrature
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ....core.parameterization import Param
|
||||
from GPy.util.caching import Cache_this
|
||||
from ....util.linalg import tdot
|
||||
from . import PSICOMP
|
||||
|
||||
class PSICOMP_GH(PSICOMP):
|
||||
"""
|
||||
TODO: support Psi2 with shape NxMxM
|
||||
"""
|
||||
|
||||
def __init__(self, degree=5, cache_K=True):
|
||||
self.degree = degree
|
||||
self.cache_K = cache_K
|
||||
self.locs, self.weights = np.polynomial.hermite.hermgauss(degree)
|
||||
self.locs *= np.sqrt(2.)
|
||||
self.weights*= 1./np.sqrt(np.pi)
|
||||
self.Xs = None
|
||||
|
||||
def _setup_observers(self):
|
||||
pass
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0,))
|
||||
def comp_K(self, Z, qX):
|
||||
if self.Xs is None or self.Xs.shape != qX.mean.shape:
|
||||
from ....core.parameterization import ObsAr
|
||||
self.Xs = ObsAr(np.empty((self.degree,)+qX.mean.shape))
|
||||
mu, S = qX.mean.values, qX.variance.values
|
||||
S_sq = np.sqrt(S)
|
||||
for i in xrange(self.degree):
|
||||
self.Xs[i] = self.locs[i]*S_sq+mu
|
||||
return self.Xs
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0,))
|
||||
def psicomputations(self, kern, Z, qX, return_psi2_n=False):
|
||||
mu, S = qX.mean.values, qX.variance.values
|
||||
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||
if self.cache_K: Xs = self.comp_K(Z, qX)
|
||||
else: S_sq = np.sqrt(S)
|
||||
|
||||
psi0 = np.zeros((N,))
|
||||
psi1 = np.zeros((N,M))
|
||||
psi2 = np.zeros((M,M))
|
||||
for i in xrange(self.degree):
|
||||
if self.cache_K:
|
||||
X = Xs[i]
|
||||
else:
|
||||
X = self.locs[i]*S_sq+mu
|
||||
psi0 += self.weights[i]* kern.Kdiag(X)
|
||||
Kfu = kern.K(X,Z)
|
||||
psi1 += self.weights[i]* Kfu
|
||||
psi2 += self.weights[i]* tdot(Kfu.T)
|
||||
return psi0, psi1, psi2
|
||||
|
||||
@Cache_this(limit=10, ignore_args=(0, 2,3,4))
|
||||
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, qX):
|
||||
mu, S = qX.mean.values, qX.variance.values
|
||||
if self.cache_K: Xs = self.comp_K(Z, qX)
|
||||
S_sq = np.sqrt(S)
|
||||
|
||||
dtheta_old = kern.gradient.copy()
|
||||
dtheta = np.zeros_like(kern.gradient)
|
||||
if isinstance(Z, Param):
|
||||
dZ = np.zeros_like(Z.values)
|
||||
else:
|
||||
dZ = np.zeros_like(Z)
|
||||
dmu = np.zeros_like(mu)
|
||||
dS = np.zeros_like(S)
|
||||
for i in xrange(self.degree):
|
||||
if self.cache_K:
|
||||
X = Xs[i]
|
||||
else:
|
||||
X = self.locs[i]*S_sq+mu
|
||||
dL_dpsi0_i = dL_dpsi0*self.weights[i]
|
||||
kern.update_gradients_diag(dL_dpsi0_i, X)
|
||||
dtheta += kern.gradient
|
||||
dX = kern.gradients_X_diag(dL_dpsi0_i, X)
|
||||
Kfu = kern.K(X,Z)
|
||||
dL_dkfu = (dL_dpsi1+ 2.*Kfu.dot(dL_dpsi2))*self.weights[i]
|
||||
kern.update_gradients_full(dL_dkfu, X, Z)
|
||||
dtheta += kern.gradient
|
||||
dX_i, dZ_i = kern.gradients_X_X2(dL_dkfu, X, Z)
|
||||
dX += dX_i
|
||||
dZ += dZ_i
|
||||
dmu += dX
|
||||
dS += dX*self.locs[i]/(2.*S_sq)
|
||||
kern.gradient[:] = dtheta_old
|
||||
return dtheta, dZ, dmu, dS
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -8,7 +8,7 @@ The package for the Psi statistics computation of the linear kernel for Bayesian
|
|||
import numpy as np
|
||||
from ....util.linalg import tdot
|
||||
|
||||
def psicomputations(variance, Z, variational_posterior):
|
||||
def psicomputations(variance, Z, variational_posterior, return_psi2_n=False):
|
||||
"""
|
||||
Compute psi-statistics for ss-linear kernel
|
||||
"""
|
||||
|
|
@ -21,8 +21,12 @@ def psicomputations(variance, Z, variational_posterior):
|
|||
S = variational_posterior.variance
|
||||
|
||||
psi0 = (variance*(np.square(mu)+S)).sum(axis=1)
|
||||
psi1 = np.dot(mu,(variance*Z).T)
|
||||
psi2 = np.dot(S.sum(axis=0)*np.square(variance)*Z,Z.T)+ tdot(psi1.T)
|
||||
Zv = variance * Z
|
||||
psi1 = np.dot(mu,Zv.T)
|
||||
if return_psi2_n:
|
||||
psi2 = psi1[:,:,None] * psi1[:,None,:] + np.dot(S[:,None,:] * Zv[None,:,:], Zv.T)
|
||||
else:
|
||||
psi2 = np.dot(S.sum(axis=0) * Zv, Zv.T) + tdot(psi1.T)
|
||||
|
||||
return psi0, psi1, psi2
|
||||
|
||||
|
|
@ -59,19 +63,39 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S):
|
|||
|
||||
variance2 = np.square(variance)
|
||||
common_sum = np.dot(mu,(variance*Z).T)
|
||||
Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
|
||||
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
|
||||
common_expect = np.dot(common_sum,np.dot(dL_dpsi2T,Z))
|
||||
Z2_expect = np.inner(common_sum,dL_dpsi2T)
|
||||
Z1_expect = np.dot(dL_dpsi2T,Z)
|
||||
if len(dL_dpsi2.shape)==2:
|
||||
Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
|
||||
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
|
||||
common_expect = np.dot(common_sum,np.dot(dL_dpsi2T,Z))
|
||||
Z2_expect = np.inner(common_sum,dL_dpsi2T)
|
||||
Z1_expect = np.dot(dL_dpsi2T,Z)
|
||||
|
||||
dL_dvar = 2.*S.sum(axis=0)*variance*Z_expect+(common_expect*mu).sum(axis=0)
|
||||
dL_dvar = 2.*S.sum(axis=0)*variance*Z_expect+(common_expect*mu).sum(axis=0)
|
||||
|
||||
dL_dmu = common_expect*variance
|
||||
dL_dmu = common_expect*variance
|
||||
|
||||
dL_dS = np.empty(S.shape)
|
||||
dL_dS[:] = Z_expect*variance2
|
||||
dL_dS = np.empty(S.shape)
|
||||
dL_dS[:] = Z_expect*variance2
|
||||
|
||||
dL_dZ = variance2*S.sum(axis=0)*Z1_expect+np.dot(Z2_expect.T,variance*mu)
|
||||
dL_dZ = variance2*S.sum(axis=0)*Z1_expect+np.dot(Z2_expect.T,variance*mu)
|
||||
else:
|
||||
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||
dL_dpsi2_ = dL_dpsi2.sum(axis=0)
|
||||
Z_expect = (np.dot(dL_dpsi2.reshape(N*M,M),Z).reshape(N,M,Q)*Z[None,:,:]).sum(axis=1)
|
||||
dL_dpsi2T = dL_dpsi2_+dL_dpsi2_.T
|
||||
dL_dpsi2T_ = dL_dpsi2+np.swapaxes(dL_dpsi2, 1, 2)
|
||||
common_expect = np.dot(common_sum,np.dot(dL_dpsi2T,Z))
|
||||
common_expect_ = (common_sum[:,:,None]*np.dot(dL_dpsi2T_.reshape(N*M,M),Z).reshape(N,M,Q)).sum(axis=1)
|
||||
Z2_expect = (common_sum[:,:,None]*dL_dpsi2T_).sum(axis=1)
|
||||
Z1_expect = np.dot(dL_dpsi2T_.reshape(N*M,M),Z).reshape(N,M,Q)
|
||||
|
||||
dL_dvar = 2.*variance*(S*Z_expect).sum(axis=0)+(common_expect_*mu).sum(axis=0)
|
||||
|
||||
dL_dmu = common_expect_*variance
|
||||
|
||||
dL_dS = np.empty(S.shape)
|
||||
dL_dS[:] = variance2* Z_expect
|
||||
|
||||
dL_dZ = variance2*(S[:,None,:]*Z1_expect).sum(axis=0)+np.dot(Z2_expect.T,variance*mu)
|
||||
|
||||
return dL_dvar, dL_dmu, dL_dS, dL_dZ
|
||||
|
|
|
|||
|
|
@ -5,13 +5,7 @@ The module for psi-statistics for RBF kernel
|
|||
import numpy as np
|
||||
from GPy.util.caching import Cacher
|
||||
|
||||
def psicomputations(variance, lengthscale, Z, variational_posterior):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
def psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=False):
|
||||
# here are the "statistics" for psi0, psi1 and psi2
|
||||
# Produced intermediate results:
|
||||
# _psi1 NxM
|
||||
|
|
@ -21,16 +15,11 @@ def psicomputations(variance, lengthscale, Z, variational_posterior):
|
|||
psi0 = np.empty(mu.shape[0])
|
||||
psi0[:] = variance
|
||||
psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
|
||||
psi2 = _psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0)
|
||||
psi2 = _psi2computations(variance, lengthscale, Z, mu, S)
|
||||
if not return_psi2_n: psi2 = psi2.sum(axis=0)
|
||||
return psi0, psi1, psi2
|
||||
|
||||
def __psi1computations(variance, lengthscale, Z, mu, S):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi1
|
||||
# Produced intermediate results:
|
||||
# _psi1 NxM
|
||||
|
|
@ -45,26 +34,19 @@ def __psi1computations(variance, lengthscale, Z, mu, S):
|
|||
return _psi1
|
||||
|
||||
def __psi2computations(variance, lengthscale, Z, mu, S):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi2
|
||||
# Produced intermediate results:
|
||||
# _psi2 MxM
|
||||
|
||||
N,M,Q = mu.shape[0], Z.shape[0], mu.shape[1]
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
_psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
|
||||
_psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
|
||||
Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
|
||||
denom = 1./(2.*S+lengthscale2)
|
||||
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
|
||||
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+(2*(mu*denom).dot(Z_hat.reshape(M*M,Q).T) - denom.dot(np.square(Z_hat).reshape(M*M,Q).T)).reshape(N,M,M)
|
||||
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
|
||||
|
||||
|
||||
return _psi2
|
||||
|
||||
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
|
|
@ -86,13 +68,6 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscal
|
|||
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
|
||||
|
||||
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
|
||||
"""
|
||||
dL_dpsi1 - NxM
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi1
|
||||
# Produced intermediate results: dL_dparams w.r.t. psi1
|
||||
# _dL_dvariance 1
|
||||
|
|
@ -118,13 +93,6 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
|
|||
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
||||
|
||||
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
dL_dpsi2 - MxM
|
||||
"""
|
||||
# here are the "statistics" for psi2
|
||||
# Produced the derivatives w.r.t. psi2:
|
||||
# _dL_dvariance 1
|
||||
|
|
@ -157,5 +125,5 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
|
|||
|
||||
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
|
||||
|
||||
_psi1computations = Cacher(__psi1computations, limit=1)
|
||||
_psi2computations = Cacher(__psi2computations, limit=1)
|
||||
_psi1computations = Cacher(__psi1computations, limit=5)
|
||||
_psi2computations = Cacher(__psi2computations, limit=5)
|
||||
|
|
|
|||
|
|
@ -7,13 +7,6 @@ from ....util.caching import Cache_this
|
|||
from . import PSICOMP_RBF
|
||||
from ....util import gpu_init
|
||||
|
||||
try:
|
||||
import pycuda.gpuarray as gpuarray
|
||||
from pycuda.compiler import SourceModule
|
||||
from ....util.linalg_gpu import sum_axis
|
||||
except:
|
||||
pass
|
||||
|
||||
gpu_code = """
|
||||
// define THREADNUM
|
||||
|
||||
|
|
@ -241,7 +234,11 @@ gpu_code = """
|
|||
|
||||
class PSICOMP_RBF_GPU(PSICOMP_RBF):
|
||||
|
||||
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
|
||||
def __init__(self, threadnum=256, blocknum=30, GPU_direct=False):
|
||||
from pycuda.compiler import SourceModule
|
||||
from ....util.gpu_init import initGPU
|
||||
initGPU()
|
||||
|
||||
self.GPU_direct = GPU_direct
|
||||
self.gpuCache = None
|
||||
|
||||
|
|
@ -265,6 +262,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
|
|||
return s
|
||||
|
||||
def _initGPUCache(self, N, M, Q):
|
||||
import pycuda.gpuarray as gpuarray
|
||||
if self.gpuCache == None:
|
||||
self.gpuCache = {
|
||||
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
|
||||
|
|
@ -320,13 +318,14 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
|
|||
def get_dimensions(self, Z, variational_posterior):
|
||||
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
|
||||
|
||||
@Cache_this(limit=1, ignore_args=(0,))
|
||||
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
|
||||
@Cache_this(limit=5, ignore_args=(0,))
|
||||
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
"""
|
||||
variance, lengthscale = kern.variance, kern.lengthscale
|
||||
N,M,Q = self.get_dimensions(Z, variational_posterior)
|
||||
self._initGPUCache(N,M,Q)
|
||||
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
|
||||
|
|
@ -355,8 +354,10 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
|
|||
else:
|
||||
return psi0, psi1_gpu.get(), psi2_gpu.get()
|
||||
|
||||
@Cache_this(limit=1, ignore_args=(0,1,2,3))
|
||||
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
@Cache_this(limit=5, ignore_args=(0,2,3,4))
|
||||
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
variance, lengthscale = kern.variance, kern.lengthscale
|
||||
from ....util.linalg_gpu import sum_axis
|
||||
ARD = (len(lengthscale)!=1)
|
||||
|
||||
N,M,Q = self.get_dimensions(Z, variational_posterior)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from ....util.linalg import tdot
|
|||
|
||||
import numpy as np
|
||||
|
||||
def psicomputations(variance, Z, variational_posterior):
|
||||
def psicomputations(variance, Z, variational_posterior, return_psi2_n=False):
|
||||
"""
|
||||
Compute psi-statistics for ss-linear kernel
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -6,14 +6,7 @@ The module for psi-statistics for RBF kernel for Spike-and-Slab GPLVM
|
|||
import numpy as np
|
||||
from ....util.caching import Cache_this
|
||||
from . import PSICOMP_RBF
|
||||
from ....util import gpu_init
|
||||
|
||||
try:
|
||||
import pycuda.gpuarray as gpuarray
|
||||
from pycuda.compiler import SourceModule
|
||||
from ....util.linalg_gpu import sum_axis
|
||||
except:
|
||||
pass
|
||||
|
||||
gpu_code = """
|
||||
// define THREADNUM
|
||||
|
|
@ -292,6 +285,11 @@ gpu_code = """
|
|||
class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
|
||||
|
||||
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
|
||||
|
||||
from pycuda.compiler import SourceModule
|
||||
from ....util.gpu_init import initGPU
|
||||
initGPU()
|
||||
|
||||
self.GPU_direct = GPU_direct
|
||||
self.gpuCache = None
|
||||
|
||||
|
|
@ -315,6 +313,7 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
|
|||
return s
|
||||
|
||||
def _initGPUCache(self, N, M, Q):
|
||||
import pycuda.gpuarray as gpuarray
|
||||
if self.gpuCache == None:
|
||||
self.gpuCache = {
|
||||
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
|
||||
|
|
@ -377,12 +376,13 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
|
|||
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
|
||||
|
||||
@Cache_this(limit=1, ignore_args=(0,))
|
||||
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
|
||||
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
"""
|
||||
variance, lengthscale = kern.variance, kern.lengthscale
|
||||
N,M,Q = self.get_dimensions(Z, variational_posterior)
|
||||
self._initGPUCache(N,M,Q)
|
||||
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
|
@ -409,8 +409,10 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
|
|||
else:
|
||||
return psi0, psi1_gpu.get(), psi2_gpu.get()
|
||||
|
||||
@Cache_this(limit=1, ignore_args=(0,1,2,3))
|
||||
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
@Cache_this(limit=1, ignore_args=(0,2,3,4))
|
||||
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
variance, lengthscale = kern.variance, kern.lengthscale
|
||||
from ....util.linalg_gpu import sum_axis
|
||||
ARD = (len(lengthscale)!=1)
|
||||
|
||||
N,M,Q = self.get_dimensions(Z, variational_posterior)
|
||||
|
|
|
|||
|
|
@ -31,6 +31,9 @@ class RBF(Stationary):
|
|||
def dK_dr(self, r):
|
||||
return -r*self.K_of_r(r)
|
||||
|
||||
def dK2_drdr(self, r):
|
||||
return (r**2-1)*self.K_of_r(r)
|
||||
|
||||
def __getstate__(self):
|
||||
dc = super(RBF, self).__getstate__()
|
||||
if self.useGPU:
|
||||
|
|
@ -50,22 +53,25 @@ class RBF(Stationary):
|
|||
#---------------------------------------#
|
||||
|
||||
def psi0(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0]
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
|
||||
|
||||
def psi1(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
|
||||
|
||||
def psi2(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2]
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
|
||||
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2]
|
||||
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[:2]
|
||||
self.variance.gradient = dL_dvar
|
||||
self.lengthscale.gradient = dL_dlengscale
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2]
|
||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2]
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:]
|
||||
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[3:]
|
||||
|
||||
|
|
|
|||
52
GPy/kern/_src/spline.py
Normal file
52
GPy/kern/_src/spline.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Copyright (c) 2015, Thomas Hornung
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from .kern import Kern
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
class Spline(Kern):
|
||||
"""
|
||||
Linear spline kernel. You need to specify 2 parameters: the variance and c.
|
||||
The variance is defined in powers of 10. Thus specifying -2 means 10^-2.
|
||||
The parameter c allows to define the stiffness of the spline fit. A very stiff
|
||||
spline equals linear regression.
|
||||
See https://www.youtube.com/watch?v=50Vgw11qn0o starting at minute 1:17:28
|
||||
Lit: Wahba, 1990
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., c=1., active_dims=None, name='spline'):
|
||||
super(Spline, self).__init__(input_dim, active_dims, name)
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
self.c = Param('c', c)
|
||||
self.link_parameters(self.variance,self.c)
|
||||
|
||||
|
||||
def K(self, X, X2=None):
|
||||
if X2 is None: X2=X
|
||||
term1 = (X+8.)*(X2.T+8.)/16.
|
||||
term2 = abs((X-X2.T)/16.)**3
|
||||
term3 = ((X+8.)/16.)**3 + ((X2.T+8.)/16.)**3
|
||||
return (self.variance**2 * (1. + (1.+self.c) * term1 + self.c/3. * (term2 - term3)))
|
||||
|
||||
def Kdiag(self, X):
|
||||
term1 = np.square(X+8.,X+8.)/16.
|
||||
term3 = 2. * ((X+8.)/16.)**3
|
||||
return (self.variance**2 * (1. + (1.+self.c) * term1 - self.c/3. * term3))[:,0]
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
if X2 is None: X2=X
|
||||
term1 = (X+8.)*(X2.T+8.)/16.
|
||||
term2 = abs((X-X2.T)/16.)**3
|
||||
term3 = ((X+8.)/16.)**3 + ((X2.T+8.)/16.)**3
|
||||
self.variance.gradient = np.sum(dL_dK * (2*self.variance * (1. + (1.+self.c) * term1 + self.c/3. * ( term2 - term3))))
|
||||
self.c.gradient = np.sum(dL_dK * (self.variance**2* (term1 + 1./3.*(term2 - term3))))
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
raise NotImplementedError
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
raise NotImplementedError
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
raise NotImplementedError
|
||||
166
GPy/kern/_src/standard_periodic.py
Normal file
166
GPy/kern/_src/standard_periodic.py
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (c) 2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
"""
|
||||
The standard periodic kernel which mentioned in:
|
||||
|
||||
[1] Gaussian Processes for Machine Learning, C. E. Rasmussen, C. K. I. Williams.
|
||||
The MIT Press, 2005.
|
||||
|
||||
|
||||
[2] Introduction to Gaussian processes. D. J. C. MacKay. In C. M. Bishop, editor,
|
||||
Neural Networks and Machine Learning, pages 133-165. Springer, 1998.
|
||||
"""
|
||||
|
||||
from .kern import Kern
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
|
||||
import numpy as np
|
||||
|
||||
class StdPeriodic(Kern):
|
||||
"""
|
||||
Standart periodic kernel
|
||||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
|
||||
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\theta_1` in the formula above
|
||||
:type variance: float
|
||||
:param wavelength: the vector of wavelengths :math:`\lambda_i`. If None then 1.0 is assumed.
|
||||
:type wavelength: array or list of the appropriate size (or float if there is only one wavelength parameter)
|
||||
:param lengthscale: the vector of lengthscale :math:`\l_i`. If None then 1.0 is assumed.
|
||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||
:param ARD1: Auto Relevance Determination with respect to wavelength.
|
||||
If equal to "False" one single wavelength parameter :math:`\lambda_i` for
|
||||
each dimension is assumed, otherwise there is one lengthscale
|
||||
parameter per dimension.
|
||||
:type ARD1: Boolean
|
||||
:param ARD2: Auto Relevance Determination with respect to lengthscale.
|
||||
If equal to "False" one single wavelength parameter :math:`l_i` for
|
||||
each dimension is assumed, otherwise there is one lengthscale
|
||||
parameter per dimension.
|
||||
:type ARD2: Boolean
|
||||
:param active_dims: indices of dimensions which are used in the computation of the kernel
|
||||
:type wavelength: array or list of the appropriate size
|
||||
:param name: Name of the kernel for output
|
||||
:type String
|
||||
:param useGPU: whether of not use GPU
|
||||
:type Boolean
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., wavelength=None, lengthscale=None, ARD1=False, ARD2=False, active_dims=None, name='std_periodic',useGPU=False):
|
||||
super(StdPeriodic, self).__init__(input_dim, active_dims, name, useGPU=useGPU)
|
||||
self.input_dim = input_dim
|
||||
self.ARD1 = ARD1 # correspond to wavelengths
|
||||
self.ARD2 = ARD2 # correspond to lengthscales
|
||||
|
||||
self.name = name
|
||||
|
||||
if self.ARD1 == False:
|
||||
if wavelength is not None:
|
||||
wavelength = np.asarray(wavelength)
|
||||
assert wavelength.size == 1, "Only one wavelength needed for non-ARD kernel"
|
||||
else:
|
||||
wavelength = np.ones(1)
|
||||
else:
|
||||
if wavelength is not None:
|
||||
wavelength = np.asarray(wavelength)
|
||||
assert wavelength.size == input_dim, "bad number of wavelengths"
|
||||
else:
|
||||
wavelength = np.ones(input_dim)
|
||||
|
||||
if self.ARD2 == False:
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
||||
else:
|
||||
lengthscale = np.ones(1)
|
||||
else:
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == input_dim, "bad number of lengthscales"
|
||||
else:
|
||||
lengthscale = np.ones(input_dim)
|
||||
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
assert self.variance.size==1, "Variance size must be one"
|
||||
self.wavelengths = Param('wavelengths', wavelength, Logexp())
|
||||
self.lengthscales = Param('lengthscales', lengthscale, Logexp())
|
||||
|
||||
self.link_parameters(self.variance, self.wavelengths, self.lengthscales)
|
||||
|
||||
def parameters_changed(self):
|
||||
"""
|
||||
This functions deals as a callback for each optimization iteration.
|
||||
If one optimization step was successfull and the parameters
|
||||
this callback function will be called to be able to update any
|
||||
precomputations for the kernel.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def K(self, X, X2=None):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
|
||||
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths
|
||||
exp_dist = np.exp( -0.5* np.sum( np.square( np.sin( base ) / self.lengthscales ), axis = -1 ) )
|
||||
|
||||
return self.variance * exp_dist
|
||||
|
||||
|
||||
def Kdiag(self, X):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
ret = np.empty(X.shape[0])
|
||||
ret[:] = self.variance
|
||||
return ret
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
|
||||
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths
|
||||
|
||||
sin_base = np.sin( base )
|
||||
exp_dist = np.exp( -0.5* np.sum( np.square( sin_base / self.lengthscales ), axis = -1 ) )
|
||||
|
||||
dwl = self.variance * (1.0/np.square(self.lengthscales)) * sin_base*np.cos(base) * (base / self.wavelengths)
|
||||
|
||||
dl = self.variance * np.square( sin_base) / np.power( self.lengthscales, 3)
|
||||
|
||||
self.variance.gradient = np.sum(exp_dist * dL_dK)
|
||||
#target[0] += np.sum( exp_dist * dL_dK)
|
||||
|
||||
if self.ARD1: # different wavelengths
|
||||
self.wavelengths.gradient = (dwl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
|
||||
else: # same wavelengths
|
||||
self.wavelengths.gradient = np.sum(dwl.sum(-1) * exp_dist * dL_dK)
|
||||
|
||||
if self.ARD2: # different lengthscales
|
||||
self.lengthscales.gradient = (dl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
|
||||
else: # same lengthscales
|
||||
self.lengthscales.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK)
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
||||
self.variance.gradient = np.sum(dL_dKdiag)
|
||||
self.wavelengths.gradient = 0
|
||||
self.lengthscales.gradient = 0
|
||||
|
||||
# def gradients_X(self, dL_dK, X, X2=None):
|
||||
# """derivative of the covariance matrix with respect to X."""
|
||||
#
|
||||
# raise NotImplemented("Periodic kernel: dK_dX not implemented")
|
||||
#
|
||||
# def gradients_X_diag(self, dL_dKdiag, X):
|
||||
#
|
||||
# raise NotImplemented("Periodic kernel: dKdiag_dX not implemented")
|
||||
|
|
@ -24,6 +24,13 @@ class Static(Kern):
|
|||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
return np.zeros(X.shape)
|
||||
|
||||
def gradients_XX(self, dL_dK, X, X2):
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
return np.zeros((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
|
||||
def gradients_XX_diag(self, dL_dKdiag, X):
|
||||
return np.zeros(X.shape)
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
return np.zeros(Z.shape)
|
||||
|
||||
|
|
@ -59,6 +66,9 @@ class White(Static):
|
|||
def psi2(self, Z, variational_posterior):
|
||||
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
return np.zeros((1, Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
if X2 is None:
|
||||
self.variance.gradient = np.trace(dL_dK)
|
||||
|
|
@ -92,6 +102,11 @@ class Bias(Static):
|
|||
ret[:] = self.variance*self.variance*variational_posterior.shape[0]
|
||||
return ret
|
||||
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
ret = np.empty((1, Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
ret[:] = self.variance*self.variance
|
||||
return ret
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()*variational_posterior.shape[0]
|
||||
|
||||
|
|
@ -120,6 +135,9 @@ class Fixed(Static):
|
|||
def psi2(self, Z, variational_posterior):
|
||||
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
return np.zeros((1, Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
self.variance.gradient = dL_dpsi0.sum()
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ from ...util.caching import Cache_this
|
|||
try:
|
||||
from . import stationary_cython
|
||||
except ImportError:
|
||||
print('warning in sationary: failed to import cython module: falling back to numpy')
|
||||
print('warning in stationary: failed to import cython module: falling back to numpy')
|
||||
config.set('cython', 'working', 'false')
|
||||
|
||||
|
||||
|
|
@ -25,13 +25,16 @@ class Stationary(Kern):
|
|||
|
||||
Stationary covariance fucntion depend only on r, where r is defined as
|
||||
|
||||
r = \sqrt{ \sum_{q=1}^Q (x_q - x'_q)^2 }
|
||||
.. math::
|
||||
r(x, x') = \\sqrt{ \\sum_{q=1}^Q (x_q - x'_q)^2 }
|
||||
|
||||
The covariance function k(x, x' can then be written k(r).
|
||||
|
||||
In this implementation, r is scaled by the lengthscales parameter(s):
|
||||
|
||||
r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }.
|
||||
.. math::
|
||||
|
||||
r(x, x') = \\sqrt{ \\sum_{q=1}^Q \\frac{(x_q - x'_q)^2}{\ell_q^2} }.
|
||||
|
||||
By default, there's only one lengthscale: seaprate lengthscales for each
|
||||
dimension can be enables by setting ARD=True.
|
||||
|
|
@ -39,11 +42,12 @@ class Stationary(Kern):
|
|||
To implement a stationary covariance function using this class, one need
|
||||
only define the covariance function k(r), and it derivative.
|
||||
|
||||
...
|
||||
def K_of_r(self, r):
|
||||
return foo
|
||||
def dK_dr(self, r):
|
||||
return bar
|
||||
```
|
||||
def K_of_r(self, r):
|
||||
return foo
|
||||
def dK_dr(self, r):
|
||||
return bar
|
||||
```
|
||||
|
||||
The lengthscale(s) and variance parameters are added to the structure automatically.
|
||||
|
||||
|
|
@ -77,6 +81,10 @@ class Stationary(Kern):
|
|||
def dK_dr(self, r):
|
||||
raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
|
||||
|
||||
@Cache_this(limit=20, ignore_args=())
|
||||
def dK2_drdr(self, r):
|
||||
raise NotImplementedError("implement second derivative of covariance wrt r to use this method")
|
||||
|
||||
@Cache_this(limit=5, ignore_args=())
|
||||
def K(self, X, X2=None):
|
||||
"""
|
||||
|
|
@ -89,11 +97,16 @@ class Stationary(Kern):
|
|||
r = self._scaled_dist(X, X2)
|
||||
return self.K_of_r(r)
|
||||
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
@Cache_this(limit=20, ignore_args=())
|
||||
def dK_dr_via_X(self, X, X2):
|
||||
#a convenience function, so we can cache dK_dr
|
||||
return self.dK_dr(self._scaled_dist(X, X2))
|
||||
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def dK2_drdr_via_X(self, X, X2):
|
||||
#a convenience function, so we can cache dK_dr
|
||||
return self.dK2_drdr(self._scaled_dist(X, X2))
|
||||
|
||||
def _unscaled_dist(self, X, X2=None):
|
||||
"""
|
||||
Compute the Euclidean distance between each row of X and X2, or between
|
||||
|
|
@ -114,12 +127,13 @@ class Stationary(Kern):
|
|||
r2 = np.clip(r2, 0, np.inf)
|
||||
return np.sqrt(r2)
|
||||
|
||||
@Cache_this(limit=5, ignore_args=())
|
||||
@Cache_this(limit=20, ignore_args=())
|
||||
def _scaled_dist(self, X, X2=None):
|
||||
"""
|
||||
Efficiently compute the scaled distance, r.
|
||||
|
||||
r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
|
||||
..math::
|
||||
r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
|
||||
|
||||
Note that if thre is only one lengthscale, l comes outside the sum. In
|
||||
this case we compute the unscaled distance first (in a separate
|
||||
|
|
@ -201,6 +215,59 @@ class Stationary(Kern):
|
|||
else:
|
||||
return self._gradients_X_pure(dL_dK, X, X2)
|
||||
|
||||
def gradients_XX(self, dL_dK, X, X2=None):
|
||||
"""
|
||||
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
|
||||
|
||||
..math:
|
||||
\frac{\partial^2 K}{\partial X\partial X2}
|
||||
|
||||
..returns:
|
||||
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
|
||||
Thus, we return the second derivative in X2.
|
||||
"""
|
||||
# The off diagonals in Q are always zero, this should also be true for the Linear kernel...
|
||||
# According to multivariable chain rule, we can chain the second derivative through r:
|
||||
# d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2:
|
||||
invdist = self._inv_dist(X, X2)
|
||||
invdist2 = invdist**2
|
||||
|
||||
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
||||
tmp1 = dL_dr * invdist
|
||||
|
||||
dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK
|
||||
tmp2 = dL_drdr * invdist2
|
||||
|
||||
l2 = np.ones(X.shape[1]) * self.lengthscale**2
|
||||
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
tmp1 -= np.eye(X.shape[0])*self.variance
|
||||
else:
|
||||
tmp1[X==X2.T] -= self.variance
|
||||
|
||||
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
|
||||
#grad = np.empty(X.shape, dtype=np.float64)
|
||||
for q in range(self.input_dim):
|
||||
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
|
||||
grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
|
||||
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
|
||||
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
|
||||
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
|
||||
return grad
|
||||
|
||||
def gradients_XX_diag(self, dL_dK, X):
|
||||
"""
|
||||
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
|
||||
|
||||
..math:
|
||||
\frac{\partial^2 K}{\partial X\partial X2}
|
||||
|
||||
..returns:
|
||||
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ]
|
||||
"""
|
||||
return np.ones(X.shape) * self.variance/self.lengthscale**2
|
||||
|
||||
def _gradients_X_pure(self, dL_dK, X, X2=None):
|
||||
invdist = self._inv_dist(X, X2)
|
||||
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
||||
|
|
@ -259,7 +326,7 @@ class OU(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -279,7 +346,7 @@ class Matern32(Stationary):
|
|||
|
||||
.. math::
|
||||
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
|
||||
|
||||
"""
|
||||
|
||||
|
|
@ -326,7 +393,7 @@ class Matern52(Stationary):
|
|||
.. math::
|
||||
|
||||
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
|
||||
"""
|
||||
"""
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
|
||||
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -4,14 +4,15 @@
|
|||
import numpy as np
|
||||
cimport numpy as np
|
||||
from cython.parallel import prange
|
||||
cimport cython
|
||||
|
||||
ctypedef np.float64_t DTYPE_t
|
||||
|
||||
cdef extern from "stationary_utils.h":
|
||||
void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad)
|
||||
void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad) nogil
|
||||
|
||||
cdef extern from "stationary_utils.h":
|
||||
void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad)
|
||||
void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad) nogil
|
||||
|
||||
def grad_X(int N, int D, int M,
|
||||
np.ndarray[DTYPE_t, ndim=2] _X,
|
||||
|
|
@ -22,18 +23,18 @@ def grad_X(int N, int D, int M,
|
|||
cdef double *X2 = <double*> _X2.data
|
||||
cdef double *tmp = <double*> _tmp.data
|
||||
cdef double *grad = <double*> _grad.data
|
||||
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
|
||||
with nogil:
|
||||
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
|
||||
|
||||
@cython.cdivision(True)
|
||||
def grad_X_cython(int N, int D, int M, double[:,:] X, double[:,:] X2, double[:,:] tmp, double[:,:] grad):
|
||||
cdef int n,d,nd,m
|
||||
for nd in prange(N*D, nogil=True):
|
||||
n = nd/D
|
||||
d = nd%D
|
||||
for nd in prange(N * D, nogil=True):
|
||||
n = nd / D
|
||||
d = nd % D
|
||||
grad[n,d] = 0.0
|
||||
for m in range(M):
|
||||
grad[n,d] += tmp[n,m]*(X[n,d]-X2[m,d])
|
||||
|
||||
|
||||
grad[n,d] += tmp[n, m] * (X[n, d] - X2[m, d])
|
||||
|
||||
def lengthscale_grads_in_c(int N, int M, int Q,
|
||||
np.ndarray[DTYPE_t, ndim=2] _tmp,
|
||||
|
|
@ -44,16 +45,16 @@ def lengthscale_grads_in_c(int N, int M, int Q,
|
|||
cdef double *X = <double*> _X.data
|
||||
cdef double *X2 = <double*> _X2.data
|
||||
cdef double *grad = <double*> _grad.data
|
||||
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
|
||||
with nogil:
|
||||
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
|
||||
|
||||
def lengthscale_grads(int N, int M, int Q, double[:,:] tmp, double[:,:] X, double[:,:] X2, double[:] grad):
|
||||
cdef int q, n, m
|
||||
cdef double gradq, dist
|
||||
for q in range(Q):
|
||||
grad[q] = 0.0
|
||||
for n in range(N):
|
||||
for m in range(M):
|
||||
dist = X[n,q] - X2[m,q]
|
||||
grad[q] += tmp[n,m]*dist*dist
|
||||
|
||||
|
||||
with nogil:
|
||||
for q in range(Q):
|
||||
grad[q] = 0.0
|
||||
for n in range(N):
|
||||
for m in range(M):
|
||||
dist = X[n,q] - X2[m,q]
|
||||
grad[q] += tmp[n, m] * dist * dist
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
#ifndef __APPLE__
|
||||
#include <omp.h>
|
||||
#endif
|
||||
void _grad_X(int N, int D, int M, double*X, double* X2, double* tmp, double* grad);
|
||||
void _lengthscale_grads(int N, int D, int M, double* X, double* X2, double* tmp, double* grad);
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class TruncLinear(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q)
|
||||
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
|
|
@ -114,7 +114,7 @@ class TruncLinear_inf(Kern):
|
|||
|
||||
.. math::
|
||||
|
||||
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q)
|
||||
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from .bernoulli import Bernoulli
|
||||
from .exponential import Exponential
|
||||
from .gaussian import Gaussian
|
||||
from .gaussian import Gaussian, HeteroscedasticGaussian
|
||||
from .gamma import Gamma
|
||||
from .poisson import Poisson
|
||||
from .student_t import StudentT
|
||||
|
|
|
|||
|
|
@ -85,6 +85,7 @@ class Bernoulli(Likelihood):
|
|||
gh_x, gh_w = gh_points
|
||||
|
||||
|
||||
gh_w = gh_w / np.sqrt(np.pi)
|
||||
shape = m.shape
|
||||
m,v,Y = m.flatten(), v.flatten(), Y.flatten()
|
||||
Ysign = np.where(Y==1,1,-1)
|
||||
|
|
@ -232,6 +233,17 @@ class Bernoulli(Likelihood):
|
|||
np.seterr(**state)
|
||||
return d3logpdf_dlink3
|
||||
|
||||
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
|
||||
"""
|
||||
Get the "quantiles" of the binary labels (Bernoulli draws). all the
|
||||
quantiles must be either 0 or 1, since those are the only values the
|
||||
draw can take!
|
||||
"""
|
||||
p = self.predictive_mean(mu, var)
|
||||
return [np.asarray(p>(q/100.), dtype=np.int32) for q in quantiles]
|
||||
|
||||
|
||||
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ class Exponential(Likelihood):
|
|||
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
|
||||
return d3lik_dlink3
|
||||
|
||||
def samples(self, gp):
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ class Gaussian(Likelihood):
|
|||
|
||||
def betaY(self,Y,Y_metadata=None):
|
||||
#TODO: ~Ricardo this does not live here
|
||||
raise RuntimeError("Please notify the GPy developers, this should not happen")
|
||||
return Y/self.gaussian_variance(Y_metadata)
|
||||
|
||||
def gaussian_variance(self, Y_metadata=None):
|
||||
|
|
@ -315,9 +316,44 @@ class Gaussian(Likelihood):
|
|||
return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
|
||||
|
||||
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
|
||||
if not isinstance(self.gp_link, link_functions.Identity):
|
||||
return super(Gaussian, self).variational_expectations(Y=Y, m=m, v=v, gh_points=gh_points, Y_metadata=Y_metadata)
|
||||
|
||||
lik_var = float(self.variance)
|
||||
F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var
|
||||
dF_dmu = (Y - m)/lik_var
|
||||
dF_dv = np.ones_like(v)*(-0.5/lik_var)
|
||||
dF_dtheta = -0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2)
|
||||
return F, dF_dmu, dF_dv, dF_dtheta.reshape(1, Y.shape[0], Y.shape[1])
|
||||
|
||||
class HeteroscedasticGaussian(Gaussian):
|
||||
def __init__(self, Y_metadata, gp_link=None, variance=1., name='het_Gauss'):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Identity()
|
||||
|
||||
if not isinstance(gp_link, link_functions.Identity):
|
||||
print("Warning, Exact inference is not implemeted for non-identity link functions,\
|
||||
if you are not already, ensure Laplace inference_method is used")
|
||||
|
||||
super(HeteroscedasticGaussian, self).__init__(gp_link, np.ones(Y_metadata['output_index'].shape)*variance, name)
|
||||
|
||||
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
|
||||
return dL_dKdiag[Y_metadata['output_index']]
|
||||
|
||||
def gaussian_variance(self, Y_metadata=None):
|
||||
return self.variance[Y_metadata['output_index'].flatten()]
|
||||
|
||||
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
|
||||
_s = self.variance[Y_metadata['output_index'].flatten()]
|
||||
if full_cov:
|
||||
if var.ndim == 2:
|
||||
var += np.eye(var.shape[0])*_s
|
||||
if var.ndim == 3:
|
||||
var += np.atleast_3d(np.eye(var.shape[0])*_s)
|
||||
else:
|
||||
var += _s
|
||||
return mu, var
|
||||
|
||||
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
|
||||
_s = self.variance[Y_metadata['output_index'].flatten()]
|
||||
return [stats.norm.ppf(q/100.)*np.sqrt(var + _s) + mu for q in quantiles]
|
||||
|
|
|
|||
|
|
@ -607,7 +607,7 @@ class Likelihood(Parameterized):
|
|||
pred_mean = self.predictive_mean(mu, var, Y_metadata=Y_metadata)
|
||||
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata=Y_metadata)
|
||||
except NotImplementedError:
|
||||
print "Finding predictive mean and variance via sampling rather than quadrature"
|
||||
print("Finding predictive mean and variance via sampling rather than quadrature")
|
||||
Nf_samp = 300
|
||||
Ny_samp = 1
|
||||
s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu
|
||||
|
|
@ -622,7 +622,7 @@ class Likelihood(Parameterized):
|
|||
Nf_samp = 300
|
||||
Ny_samp = 1
|
||||
s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu
|
||||
ss_y = self.samples(s, Y_metadata, samples=Ny_samp)
|
||||
ss_y = self.samples(s, Y_metadata)#, samples=Ny_samp)
|
||||
#ss_y = ss_y.reshape(mu.shape[0], mu.shape[1], Nf_samp*Ny_samp)
|
||||
|
||||
pred_quantiles = [np.percentile(ss_y, q, axis=1)[:,None] for q in quantiles]
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
import scipy
|
||||
from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf
|
||||
import scipy as sp
|
||||
from ..util.misc import safe_exp, safe_square, safe_cube, safe_quad, safe_three_times
|
||||
|
|
@ -140,7 +141,7 @@ class Log_ex_1(GPTransformation):
|
|||
|
||||
"""
|
||||
def transf(self,f):
|
||||
return np.log1p(safe_exp(f))
|
||||
return scipy.special.log1p(safe_exp(f))
|
||||
|
||||
def dtransf_df(self,f):
|
||||
ef = safe_exp(f)
|
||||
|
|
|
|||
|
|
@ -145,5 +145,7 @@ class Poisson(Likelihood):
|
|||
"""
|
||||
orig_shape = gp.shape
|
||||
gp = gp.flatten()
|
||||
# Ysim = np.random.poisson(self.gp_link.transf(gp), [samples, gp.size]).T
|
||||
# return Ysim.reshape(orig_shape+(samples,))
|
||||
Ysim = np.random.poisson(self.gp_link.transf(gp))
|
||||
return Ysim.reshape(orig_shape)
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_miniba
|
|||
import logging
|
||||
from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch
|
||||
from GPy.core.parameterization.param import Param
|
||||
from GPy.core.parameterization.observable_array import ObsAr
|
||||
|
||||
class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||
"""
|
||||
|
|
@ -80,46 +81,10 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
|||
"""Get the gradients of the posterior distribution of X in its specific form."""
|
||||
return X.mean.gradient, X.variance.gradient
|
||||
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kw):
|
||||
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices, **kw)
|
||||
|
||||
if self.has_uncertain_inputs():
|
||||
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
|
||||
variational_posterior=X,
|
||||
Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
|
||||
dL_dpsi1=grad_dict['dL_dpsi1'],
|
||||
dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
else:
|
||||
current_values['Xgrad'] = self.kern.gradients_X(grad_dict['dL_dKnm'], X, Z)
|
||||
current_values['Xgrad'] += self.kern.gradients_X_diag(grad_dict['dL_dKdiag'], X)
|
||||
if subset_indices is not None:
|
||||
value_indices['Xgrad'] = subset_indices['samples']
|
||||
|
||||
kl_fctr = self.kl_factr
|
||||
if self.has_uncertain_inputs():
|
||||
if self.missing_data:
|
||||
d = self.output_dim
|
||||
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
|
||||
else:
|
||||
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
|
||||
|
||||
# Subsetting Variational Posterior objects, makes the gradients
|
||||
# empty. We need them to be 0 though:
|
||||
X.mean.gradient[:] = 0
|
||||
X.variance.gradient[:] = 0
|
||||
|
||||
self.variational_prior.update_gradients_KL(X)
|
||||
if self.missing_data:
|
||||
current_values['meangrad'] += kl_fctr*X.mean.gradient/d
|
||||
current_values['vargrad'] += kl_fctr*X.variance.gradient/d
|
||||
else:
|
||||
current_values['meangrad'] += kl_fctr*X.mean.gradient
|
||||
current_values['vargrad'] += kl_fctr*X.variance.gradient
|
||||
|
||||
if subset_indices is not None:
|
||||
value_indices['meangrad'] = subset_indices['samples']
|
||||
value_indices['vargrad'] = subset_indices['samples']
|
||||
return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kw):
|
||||
posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm,
|
||||
psi0=psi0, psi1=psi1, psi2=psi2, **kw)
|
||||
return posterior, log_marginal_likelihood, grad_dict
|
||||
|
||||
def _outer_values_update(self, full_values):
|
||||
"""
|
||||
|
|
@ -128,22 +93,47 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
|||
"""
|
||||
super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values)
|
||||
if self.has_uncertain_inputs():
|
||||
self.X.mean.gradient = full_values['meangrad']
|
||||
self.X.variance.gradient = full_values['vargrad']
|
||||
meangrad_tmp, vargrad_tmp = self.kern.gradients_qX_expectations(
|
||||
variational_posterior=self.X,
|
||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||
dL_dpsi1=full_values['dL_dpsi1'],
|
||||
dL_dpsi2=full_values['dL_dpsi2'],
|
||||
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
|
||||
|
||||
self.X.mean.gradient = meangrad_tmp
|
||||
self.X.variance.gradient = vargrad_tmp
|
||||
else:
|
||||
self.X.gradient = full_values['Xgrad']
|
||||
self.X.gradient = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z)
|
||||
self.X.gradient += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X)
|
||||
|
||||
def _outer_init_full_values(self):
|
||||
if self.has_uncertain_inputs():
|
||||
return dict(meangrad=np.zeros(self.X.mean.shape),
|
||||
vargrad=np.zeros(self.X.variance.shape))
|
||||
else:
|
||||
return dict(Xgrad=np.zeros(self.X.shape))
|
||||
return super(BayesianGPLVMMiniBatch, self)._outer_init_full_values()
|
||||
|
||||
def parameters_changed(self):
|
||||
super(BayesianGPLVMMiniBatch,self).parameters_changed()
|
||||
if isinstance(self.inference_method, VarDTC_minibatch):
|
||||
return
|
||||
|
||||
kl_fctr = self.kl_factr
|
||||
if kl_fctr > 0:
|
||||
Xgrad = self.X.gradient.copy()
|
||||
self.X.gradient[:] = 0
|
||||
self.variational_prior.update_gradients_KL(self.X)
|
||||
|
||||
if self.missing_data or not self.stochastics:
|
||||
self.X.mean.gradient = kl_fctr*self.X.mean.gradient
|
||||
self.X.variance.gradient = kl_fctr*self.X.variance.gradient
|
||||
else:
|
||||
d = self.output_dim
|
||||
self.X.mean.gradient = kl_fctr*self.X.mean.gradient*self.stochastics.batchsize/d
|
||||
self.X.variance.gradient = kl_fctr*self.X.variance.gradient*self.stochastics.batchsize/d
|
||||
self.X.gradient += Xgrad
|
||||
|
||||
if self.missing_data or not self.stochastics:
|
||||
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
|
||||
elif self.stochastics:
|
||||
d = self.output_dim
|
||||
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d
|
||||
|
||||
self._Xgrad = self.X.gradient.copy()
|
||||
|
||||
def plot_latent(self, labels=None, which_indices=None,
|
||||
resolution=50, ax=None, marker='o', s=40,
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2015 James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import numpy as np
|
||||
from ..core import GP
|
||||
from ..models import GPLVM
|
||||
from ..mappings import *
|
||||
from . import GPLVM
|
||||
from .. import mappings
|
||||
|
||||
|
||||
class BCGPLVM(GPLVM):
|
||||
|
|
@ -16,33 +16,31 @@ class BCGPLVM(GPLVM):
|
|||
:type Y: np.ndarray
|
||||
:param input_dim: latent dimensionality
|
||||
:type input_dim: int
|
||||
:param init: initialisation method for the latent space
|
||||
:type init: 'PCA'|'random'
|
||||
:param mapping: mapping for back constraint
|
||||
:type mapping: GPy.core.Mapping object
|
||||
|
||||
"""
|
||||
def __init__(self, Y, input_dim, init='PCA', X=None, kernel=None, normalize_Y=False, mapping=None):
|
||||
def __init__(self, Y, input_dim, kernel=None, mapping=None):
|
||||
|
||||
|
||||
if mapping is None:
|
||||
mapping = Kernel(X=Y, output_dim=input_dim)
|
||||
mapping = mappings.MLP(input_dim=Y.shape[1],
|
||||
output_dim=input_dim,
|
||||
hidden_dim=10)
|
||||
else:
|
||||
assert mapping.input_dim==Y.shape[1], "mapping input dim does not work for Y dimension"
|
||||
assert mapping.output_dim==input_dim, "mapping output dim does not work for self.input_dim"
|
||||
GPLVM.__init__(self, Y, input_dim, X=mapping.f(Y), kernel=kernel, name="bcgplvm")
|
||||
self.unlink_parameter(self.X)
|
||||
self.mapping = mapping
|
||||
GPLVM.__init__(self, Y, input_dim, init, X, kernel, normalize_Y)
|
||||
self.X = self.mapping.f(self.likelihood.Y)
|
||||
self.link_parameter(self.mapping)
|
||||
|
||||
def _get_param_names(self):
|
||||
return self.mapping._get_param_names() + GP._get_param_names(self)
|
||||
self.X = self.mapping.f(self.Y)
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.mapping._get_params(), GP._get_params(self)))
|
||||
def parameters_changed(self):
|
||||
self.X = self.mapping.f(self.Y)
|
||||
GP.parameters_changed(self)
|
||||
Xgradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
|
||||
self.mapping.update_gradients(Xgradient, self.Y)
|
||||
|
||||
def _set_params(self, x):
|
||||
self.mapping._set_params(x[:self.mapping.num_params])
|
||||
self.X = self.mapping.f(self.likelihood.Y)
|
||||
GP._set_params(self, x[self.mapping.num_params:])
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
dL_df = self.kern.gradients_X(self.dL_dK, self.X)
|
||||
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
|
||||
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,8 @@ class GPHeteroscedasticRegression(GP):
|
|||
:param X: input observations
|
||||
:param Y: observed values
|
||||
:param kernel: a GPy kernel, defaults to rbf
|
||||
|
||||
NB: This model does not make inference on the noise outside the training set
|
||||
"""
|
||||
def __init__(self, X, Y, kernel=None, Y_metadata=None):
|
||||
|
||||
|
|
@ -30,10 +32,7 @@ class GPHeteroscedasticRegression(GP):
|
|||
kernel = kern.RBF(X.shape[1])
|
||||
|
||||
#Likelihood
|
||||
#likelihoods_list = [likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for j in range(Ny)]
|
||||
noise_terms = np.unique(Y_metadata['output_index'].flatten())
|
||||
likelihoods_list = [likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for j in noise_terms]
|
||||
likelihood = likelihoods.MixedNoise(likelihoods_list=likelihoods_list)
|
||||
likelihood = likelihoods.HeteroscedasticGaussian(Y_metadata)
|
||||
|
||||
super(GPHeteroscedasticRegression, self).__init__(X,Y,kernel,likelihood, Y_metadata=Y_metadata)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# Copyright (c) 2014, James Hensman, Alan Saul
|
||||
# Distributed under the terms of the GNU General public License, see LICENSE.txt
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..core.model import Model
|
||||
|
|
|
|||
|
|
@ -26,12 +26,12 @@ class GPRegression(GP):
|
|||
|
||||
"""
|
||||
|
||||
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1.):
|
||||
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1., mean_function=None):
|
||||
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(X.shape[1])
|
||||
|
||||
likelihood = likelihoods.Gaussian(variance=noise_var)
|
||||
|
||||
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer)
|
||||
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,20 +1,17 @@
|
|||
# Copyright (c) 2014, James Hensman, Alan Saul
|
||||
# Distributed under the terms of the GNU General public License, see LICENSE.txt
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from scipy.special import erf
|
||||
from ..core.model import Model
|
||||
from ..core import GP
|
||||
from ..core.parameterization import ObsAr
|
||||
from .. import kern
|
||||
from ..core.parameterization.param import Param
|
||||
from ..util.linalg import pdinv
|
||||
from ..likelihoods import Gaussian
|
||||
from ..inference.latent_function_inference import VarGauss
|
||||
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
|
||||
class GPVariationalGaussianApproximation(Model):
|
||||
class GPVariationalGaussianApproximation(GP):
|
||||
"""
|
||||
The Variational Gaussian Approximation revisited
|
||||
|
||||
|
|
@ -26,70 +23,14 @@ class GPVariationalGaussianApproximation(Model):
|
|||
pages = {786--792},
|
||||
}
|
||||
"""
|
||||
def __init__(self, X, Y, kernel, likelihood=None, Y_metadata=None):
|
||||
Model.__init__(self,'Variational GP')
|
||||
if likelihood is None:
|
||||
likelihood = Gaussian()
|
||||
# accept the construction arguments
|
||||
self.X = ObsAr(X)
|
||||
self.Y = Y
|
||||
self.num_data, self.input_dim = self.X.shape
|
||||
self.Y_metadata = Y_metadata
|
||||
def __init__(self, X, Y, kernel, likelihood, Y_metadata=None):
|
||||
|
||||
self.kern = kernel
|
||||
self.likelihood = likelihood
|
||||
self.link_parameter(self.kern)
|
||||
self.link_parameter(self.likelihood)
|
||||
num_data = Y.shape[0]
|
||||
self.alpha = Param('alpha', np.zeros((num_data,1))) # only one latent fn for now.
|
||||
self.beta = Param('beta', np.ones(num_data))
|
||||
|
||||
inf = VarGauss(self.alpha, self.beta)
|
||||
super(GPVariationalGaussianApproximation, self).__init__(X, Y, kernel, likelihood, name='VarGP', inference_method=inf)
|
||||
|
||||
self.alpha = Param('alpha', np.zeros((self.num_data,1))) # only one latent fn for now.
|
||||
self.beta = Param('beta', np.ones(self.num_data))
|
||||
self.link_parameter(self.alpha)
|
||||
self.link_parameter(self.beta)
|
||||
|
||||
def log_likelihood(self):
|
||||
return self._log_lik
|
||||
|
||||
def parameters_changed(self):
|
||||
K = self.kern.K(self.X)
|
||||
m = K.dot(self.alpha)
|
||||
KB = K*self.beta[:, None]
|
||||
BKB = KB*self.beta[None, :]
|
||||
A = np.eye(self.num_data) + BKB
|
||||
Ai, LA, _, Alogdet = pdinv(A)
|
||||
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
|
||||
var = np.diag(Sigma).reshape(-1,1)
|
||||
|
||||
F, dF_dm, dF_dv, dF_dthetaL = self.likelihood.variational_expectations(self.Y, m, var, Y_metadata=self.Y_metadata)
|
||||
self.likelihood.gradient = dF_dthetaL.sum(1).sum(1)
|
||||
dF_da = np.dot(K, dF_dm)
|
||||
SigmaB = Sigma*self.beta
|
||||
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
|
||||
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + np.sum(m*self.alpha))
|
||||
dKL_da = m
|
||||
A_A2 = Ai - Ai.dot(Ai)
|
||||
dKL_db = np.diag(np.dot(KB.T, A_A2))
|
||||
self._log_lik = F.sum() - KL
|
||||
self.alpha.gradient = dF_da - dKL_da
|
||||
self.beta.gradient = dF_db - dKL_db
|
||||
|
||||
# K-gradients
|
||||
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
|
||||
tmp = Ai*self.beta[:, None]/self.beta[None, :]
|
||||
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
|
||||
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
|
||||
|
||||
def _raw_predict(self, Xnew):
|
||||
"""
|
||||
Predict the function(s) at the new point(s) Xnew.
|
||||
|
||||
:param Xnew: The points at which to make a prediction
|
||||
:type Xnew: np.ndarray, Nnew x self.input_dim
|
||||
"""
|
||||
Wi, _, _, _ = pdinv(self.kern.K(self.X) + np.diag(self.beta**-2))
|
||||
Kux = self.kern.K(self.X, Xnew)
|
||||
mu = np.dot(Kux.T, self.alpha)
|
||||
WiKux = np.dot(Wi, Kux)
|
||||
Kxx = self.kern.Kdiag(Xnew)
|
||||
var = Kxx - np.sum(WiKux*Kux, 0)
|
||||
|
||||
return mu, var.reshape(-1,1)
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ class GPLVM(GP):
|
|||
likelihood = Gaussian()
|
||||
|
||||
super(GPLVM, self).__init__(X, Y, kernel, likelihood, name='GPLVM')
|
||||
|
||||
self.X = Param('latent_mean', X)
|
||||
self.link_parameter(self.X, index=0)
|
||||
|
||||
|
|
@ -43,27 +44,30 @@ class GPLVM(GP):
|
|||
super(GPLVM, self).parameters_changed()
|
||||
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
|
||||
|
||||
def jacobian(self,X):
|
||||
J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||
for i in range(self.output_dim):
|
||||
J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X)
|
||||
return J
|
||||
#def jacobian(self,X):
|
||||
# J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||
# for i in range(self.output_dim):
|
||||
# J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X)
|
||||
# return J
|
||||
|
||||
def magnification(self,X):
|
||||
target=np.zeros(X.shape[0])
|
||||
#J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||
J = self.jacobian(X)
|
||||
for i in range(X.shape[0]):
|
||||
target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
|
||||
return target
|
||||
#def magnification(self,X):
|
||||
# target=np.zeros(X.shape[0])
|
||||
# #J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||
## J = self.jacobian(X)
|
||||
# for i in range(X.shape[0]):
|
||||
# target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
|
||||
# return target
|
||||
|
||||
def plot(self):
|
||||
assert self.likelihood.Y.shape[1] == 2
|
||||
pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) # @UndefinedVariable
|
||||
assert self.Y.shape[1] == 2, "too high dimensional to plot. Try plot_latent"
|
||||
from matplotlib import pyplot as plt
|
||||
plt.scatter(self.Y[:, 0],
|
||||
self.Y[:, 1],
|
||||
40, self.X[:, 0].copy(),
|
||||
linewidth=0, cmap=plt.cm.jet)
|
||||
Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
|
||||
mu, _ = self.predict(Xnew)
|
||||
import pylab as pb
|
||||
pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
|
||||
plt.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
|
||||
|
||||
def plot_latent(self, labels=None, which_indices=None,
|
||||
resolution=50, ax=None, marker='o', s=40,
|
||||
|
|
@ -78,6 +82,3 @@ class GPLVM(GP):
|
|||
resolution, ax, marker, s,
|
||||
fignum, False, legend,
|
||||
plot_limits, aspect, updates, **kwargs)
|
||||
|
||||
def plot_magnification(self, *args, **kwargs):
|
||||
return util.plot_latent.plot_magnification(self, *args, **kwargs)
|
||||
|
|
|
|||
|
|
@ -251,7 +251,7 @@ class HessianChecker(GradientChecker):
|
|||
print(grad_string)
|
||||
|
||||
if plot:
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
fig, axes = pb.subplots(2, 2)
|
||||
max_lim = numpy.max(numpy.vstack((analytic_hess, numeric_hess)))
|
||||
min_lim = numpy.min(numpy.vstack((analytic_hess, numeric_hess)))
|
||||
|
|
|
|||
|
|
@ -170,20 +170,19 @@ class MRD(BayesianGPLVMMiniBatch):
|
|||
self._log_marginal_likelihood += b._log_marginal_likelihood
|
||||
|
||||
self.logger.info('working on im <{}>'.format(hex(id(i))))
|
||||
self.Z.gradient[:] += b.full_values['Zgrad']
|
||||
grad_dict = b.full_values
|
||||
self.Z.gradient[:] += b.Z.gradient#full_values['Zgrad']
|
||||
#grad_dict = b.full_values
|
||||
|
||||
if self.has_uncertain_inputs():
|
||||
self.X.mean.gradient += grad_dict['meangrad']
|
||||
self.X.variance.gradient += grad_dict['vargrad']
|
||||
self.X.gradient += b._Xgrad
|
||||
else:
|
||||
self.X.gradient += grad_dict['Xgrad']
|
||||
self.X.gradient += b._Xgrad
|
||||
|
||||
if self.has_uncertain_inputs():
|
||||
# update for the KL divergence
|
||||
self.variational_prior.update_gradients_KL(self.X)
|
||||
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||
pass
|
||||
#if self.has_uncertain_inputs():
|
||||
# # update for the KL divergence
|
||||
# self.variational_prior.update_gradients_KL(self.X)
|
||||
# self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||
# pass
|
||||
|
||||
def log_likelihood(self):
|
||||
return self._log_marginal_likelihood
|
||||
|
|
|
|||
|
|
@ -63,10 +63,10 @@ class SparseGPMiniBatch(SparseGP):
|
|||
|
||||
if stochastic and missing_data:
|
||||
self.missing_data = True
|
||||
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||
self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
|
||||
elif stochastic and not missing_data:
|
||||
self.missing_data = False
|
||||
self.stochastics = SparseGPStochastics(self, batchsize)
|
||||
self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
|
||||
elif missing_data:
|
||||
self.missing_data = True
|
||||
self.stochastics = SparseGPMissing(self)
|
||||
|
|
@ -80,7 +80,7 @@ class SparseGPMiniBatch(SparseGP):
|
|||
def has_uncertain_inputs(self):
|
||||
return isinstance(self.X, VariationalPosterior)
|
||||
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kwargs):
|
||||
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kwargs):
|
||||
"""
|
||||
This is the standard part, which usually belongs in parameters_changed.
|
||||
|
||||
|
|
@ -99,47 +99,13 @@ class SparseGPMiniBatch(SparseGP):
|
|||
like them into this dictionary for inner use of the indices inside the
|
||||
algorithm.
|
||||
"""
|
||||
try:
|
||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None, **kwargs)
|
||||
except:
|
||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata)
|
||||
current_values = {}
|
||||
likelihood.update_gradients(grad_dict['dL_dthetaL'])
|
||||
current_values['likgrad'] = likelihood.gradient.copy()
|
||||
if subset_indices is None:
|
||||
subset_indices = {}
|
||||
if isinstance(X, VariationalPosterior):
|
||||
#gradients wrt kernel
|
||||
dL_dKmm = grad_dict['dL_dKmm']
|
||||
kern.update_gradients_full(dL_dKmm, Z, None)
|
||||
current_values['kerngrad'] = kern.gradient.copy()
|
||||
kern.update_gradients_expectations(variational_posterior=X,
|
||||
Z=Z,
|
||||
dL_dpsi0=grad_dict['dL_dpsi0'],
|
||||
dL_dpsi1=grad_dict['dL_dpsi1'],
|
||||
dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
current_values['kerngrad'] += kern.gradient
|
||||
|
||||
#gradients wrt Z
|
||||
current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
|
||||
current_values['Zgrad'] += kern.gradients_Z_expectations(
|
||||
grad_dict['dL_dpsi0'],
|
||||
grad_dict['dL_dpsi1'],
|
||||
grad_dict['dL_dpsi2'],
|
||||
Z=Z,
|
||||
variational_posterior=X)
|
||||
if psi2 is None:
|
||||
psi2_sum_n = None
|
||||
else:
|
||||
#gradients wrt kernel
|
||||
kern.update_gradients_diag(grad_dict['dL_dKdiag'], X)
|
||||
current_values['kerngrad'] = kern.gradient.copy()
|
||||
kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z)
|
||||
current_values['kerngrad'] += kern.gradient
|
||||
kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
|
||||
current_values['kerngrad'] += kern.gradient
|
||||
#gradients wrt Z
|
||||
current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
|
||||
current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
|
||||
return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
|
||||
psi2_sum_n = psi2.sum(axis=0)
|
||||
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm,
|
||||
dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs)
|
||||
return posterior, log_marginal_likelihood, grad_dict
|
||||
|
||||
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
|
||||
"""
|
||||
|
|
@ -173,7 +139,10 @@ class SparseGPMiniBatch(SparseGP):
|
|||
else:
|
||||
index = slice(None)
|
||||
if key in full_values:
|
||||
full_values[key][index] += current_values[key]
|
||||
try:
|
||||
full_values[key][index] += current_values[key]
|
||||
except:
|
||||
full_values[key] += current_values[key]
|
||||
else:
|
||||
full_values[key] = current_values[key]
|
||||
|
||||
|
|
@ -192,9 +161,41 @@ class SparseGPMiniBatch(SparseGP):
|
|||
Here you put the values, which were collected before in the right places.
|
||||
E.g. set the gradients of parameters, etc.
|
||||
"""
|
||||
self.likelihood.gradient = full_values['likgrad']
|
||||
self.kern.gradient = full_values['kerngrad']
|
||||
self.Z.gradient = full_values['Zgrad']
|
||||
if self.has_uncertain_inputs():
|
||||
#gradients wrt kernel
|
||||
dL_dKmm = full_values['dL_dKmm']
|
||||
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
|
||||
kgrad = self.kern.gradient.copy()
|
||||
self.kern.update_gradients_expectations(
|
||||
variational_posterior=self.X,
|
||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||
dL_dpsi1=full_values['dL_dpsi1'],
|
||||
dL_dpsi2=full_values['dL_dpsi2'])
|
||||
self.kern.gradient += kgrad
|
||||
|
||||
|
||||
#gradients wrt Z
|
||||
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
|
||||
self.Z.gradient += self.kern.gradients_Z_expectations(
|
||||
variational_posterior=self.X,
|
||||
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
|
||||
dL_dpsi1=full_values['dL_dpsi1'],
|
||||
dL_dpsi2=full_values['dL_dpsi2'])
|
||||
else:
|
||||
#gradients wrt kernel
|
||||
self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X)
|
||||
kgrad = self.kern.gradient.copy()
|
||||
self.kern.update_gradients_full(full_values['dL_dKnm'], self.X, self.Z)
|
||||
kgrad += self.kern.gradient
|
||||
self.kern.update_gradients_full(full_values['dL_dKmm'], self.Z, None)
|
||||
self.kern.gradient += kgrad
|
||||
#kgrad += self.kern.gradient
|
||||
|
||||
#gradients wrt Z
|
||||
self.Z.gradient = self.kern.gradients_X(full_values['dL_dKmm'], self.Z)
|
||||
self.Z.gradient += self.kern.gradients_X(full_values['dL_dKnm'].T, self.Z, self.X)
|
||||
|
||||
self.likelihood.update_gradients(full_values['dL_dthetaL'])
|
||||
|
||||
def _outer_init_full_values(self):
|
||||
"""
|
||||
|
|
@ -209,7 +210,15 @@ class SparseGPMiniBatch(SparseGP):
|
|||
to initialize the gradients for the mean and the variance in order to
|
||||
have the full gradient for indexing)
|
||||
"""
|
||||
return {}
|
||||
retd = dict(dL_dKmm=np.zeros((self.Z.shape[0], self.Z.shape[0])))
|
||||
if self.has_uncertain_inputs():
|
||||
retd.update(dict(dL_dpsi0=np.zeros(self.X.shape[0]),
|
||||
dL_dpsi1=np.zeros((self.X.shape[0], self.Z.shape[0])),
|
||||
dL_dpsi2=np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0]))))
|
||||
else:
|
||||
retd.update({'dL_dKdiag': np.zeros(self.X.shape[0]),
|
||||
'dL_dKnm': np.zeros((self.X.shape[0], self.Z.shape[0]))})
|
||||
return retd
|
||||
|
||||
def _outer_loop_for_missing_data(self):
|
||||
Lm = None
|
||||
|
|
@ -231,28 +240,36 @@ class SparseGPMiniBatch(SparseGP):
|
|||
print(message, end=' ')
|
||||
|
||||
for d, ninan in self.stochastics.d:
|
||||
|
||||
if not self.stochastics:
|
||||
print(' '*(len(message)) + '\r', end=' ')
|
||||
message = m_f(d)
|
||||
print(message, end=' ')
|
||||
|
||||
posterior, log_marginal_likelihood, \
|
||||
grad_dict, current_values, value_indices = self._inner_parameters_changed(
|
||||
psi0ni = self.psi0[ninan]
|
||||
psi1ni = self.psi1[ninan]
|
||||
if self.has_uncertain_inputs():
|
||||
psi2ni = self.psi2[ninan]
|
||||
value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, dL_dpsi2=ninan)
|
||||
else:
|
||||
psi2ni = None
|
||||
value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan)
|
||||
|
||||
posterior, log_marginal_likelihood, grad_dict = self._inner_parameters_changed(
|
||||
self.kern, self.X[ninan],
|
||||
self.Z, self.likelihood,
|
||||
self.Y_normalized[ninan][:, d], self.Y_metadata,
|
||||
Lm, dL_dKmm,
|
||||
subset_indices=dict(outputs=d, samples=ninan))
|
||||
psi0=psi0ni, psi1=psi1ni, psi2=psi2ni)
|
||||
|
||||
self._inner_take_over_or_update(self.full_values, current_values, value_indices)
|
||||
self._inner_values_update(current_values)
|
||||
# Fill out the full values by adding in the apporpriate grad_dict
|
||||
# values
|
||||
self._inner_take_over_or_update(self.full_values, grad_dict, value_indices)
|
||||
self._inner_values_update(grad_dict) # What is this for? -> MRD
|
||||
|
||||
Lm = posterior.K_chol
|
||||
dL_dKmm = grad_dict['dL_dKmm']
|
||||
woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
|
||||
woodbury_vector[:, d] = posterior.woodbury_vector
|
||||
self._log_marginal_likelihood += log_marginal_likelihood
|
||||
|
||||
if not self.stochastics:
|
||||
print('')
|
||||
|
||||
|
|
@ -260,10 +277,10 @@ class SparseGPMiniBatch(SparseGP):
|
|||
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
|
||||
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
||||
self._outer_values_update(self.full_values)
|
||||
if self.has_uncertain_inputs():
|
||||
self.kern.return_psi2_n = False
|
||||
|
||||
def _outer_loop_without_missing_data(self):
|
||||
self._log_marginal_likelihood = 0
|
||||
|
||||
if self.posterior is None:
|
||||
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
|
||||
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
|
||||
|
|
@ -271,17 +288,16 @@ class SparseGPMiniBatch(SparseGP):
|
|||
woodbury_inv = self.posterior._woodbury_inv
|
||||
woodbury_vector = self.posterior._woodbury_vector
|
||||
|
||||
d = self.stochastics.d
|
||||
posterior, log_marginal_likelihood, \
|
||||
grad_dict, self.full_values, _ = self._inner_parameters_changed(
|
||||
d = self.stochastics.d[0][0]
|
||||
posterior, log_marginal_likelihood, grad_dict= self._inner_parameters_changed(
|
||||
self.kern, self.X,
|
||||
self.Z, self.likelihood,
|
||||
self.Y_normalized[:, d], self.Y_metadata)
|
||||
self.grad_dict = grad_dict
|
||||
|
||||
self._log_marginal_likelihood += log_marginal_likelihood
|
||||
self._log_marginal_likelihood = log_marginal_likelihood
|
||||
|
||||
self._outer_values_update(self.full_values)
|
||||
self._outer_values_update(self.grad_dict)
|
||||
|
||||
woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None]
|
||||
woodbury_vector[:, d] = posterior.woodbury_vector
|
||||
|
|
@ -290,10 +306,23 @@ class SparseGPMiniBatch(SparseGP):
|
|||
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
|
||||
|
||||
def parameters_changed(self):
|
||||
#Compute the psi statistics for N once, but don't sum out N in psi2
|
||||
if self.has_uncertain_inputs():
|
||||
#psi0 = ObsAr(self.kern.psi0(self.Z, self.X))
|
||||
#psi1 = ObsAr(self.kern.psi1(self.Z, self.X))
|
||||
#psi2 = ObsAr(self.kern.psi2(self.Z, self.X))
|
||||
self.psi0 = self.kern.psi0(self.Z, self.X)
|
||||
self.psi1 = self.kern.psi1(self.Z, self.X)
|
||||
self.psi2 = self.kern.psi2n(self.Z, self.X)
|
||||
else:
|
||||
self.psi0 = self.kern.Kdiag(self.X)
|
||||
self.psi1 = self.kern.K(self.X, self.Z)
|
||||
self.psi2 = None
|
||||
|
||||
if self.missing_data:
|
||||
self._outer_loop_for_missing_data()
|
||||
elif self.stochastics:
|
||||
self._outer_loop_without_missing_data()
|
||||
else:
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||
self._outer_values_update(self.full_values)
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||
self._outer_values_update(self.grad_dict)
|
||||
|
|
|
|||
|
|
@ -4,4 +4,8 @@
|
|||
try:
|
||||
from . import matplot_dep
|
||||
except (ImportError, NameError):
|
||||
print('Fail to load GPy.plotting.matplot_dep.')
|
||||
# Matplotlib not available
|
||||
import warnings
|
||||
warnings.warn(ImportWarning("Matplotlib not available, install newest version of Matplotlib for plotting"))
|
||||
#sys.modules['matplotlib'] =
|
||||
#sys.modules[__name__+'.matplot_dep'] = ImportWarning("Matplotlib not available, install newest version of Matplotlib for plotting")
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
|
||||
import matplotlib as mpl
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
import sys
|
||||
#sys.path.append('/home/james/mlprojects/sitran_cluster/')
|
||||
#from switch_pylab_backend import *
|
||||
|
|
@ -159,7 +159,7 @@ cdict_Alu = {'red' :((0./5,colorsRGB['Aluminium1'][0]/256.,colorsRGB['Aluminium1
|
|||
# cmap_BGR = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_BGR,256)
|
||||
# cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256)
|
||||
if __name__=='__main__':
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
pb.figure()
|
||||
pb.pcolor(pb.rand(10,10),cmap=cmap_RB)
|
||||
pb.colorbar()
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
|
||||
try:
|
||||
import Tango
|
||||
import pylab as pb
|
||||
#import Tango
|
||||
from matplotlib import pyplot as pb
|
||||
except:
|
||||
pass
|
||||
import numpy as np
|
||||
|
|
@ -17,11 +17,11 @@ def ax_default(fignum, ax):
|
|||
fig = ax.figure
|
||||
return fig, ax
|
||||
|
||||
def meanplot(x, mu, color=Tango.colorsHex['darkBlue'], ax=None, fignum=None, linewidth=2,**kw):
|
||||
def meanplot(x, mu, color='#3300FF', ax=None, fignum=None, linewidth=2,**kw):
|
||||
_, axes = ax_default(fignum, ax)
|
||||
return axes.plot(x,mu,color=color,linewidth=linewidth,**kw)
|
||||
|
||||
def gpplot(x, mu, lower, upper, edgecol=Tango.colorsHex['darkBlue'], fillcol=Tango.colorsHex['lightBlue'], ax=None, fignum=None, **kwargs):
|
||||
def gpplot(x, mu, lower, upper, edgecol='#3300FF', fillcol='#33CCFF', ax=None, fignum=None, **kwargs):
|
||||
_, axes = ax_default(fignum, ax)
|
||||
|
||||
mu = mu.flatten()
|
||||
|
|
@ -47,6 +47,32 @@ def gpplot(x, mu, lower, upper, edgecol=Tango.colorsHex['darkBlue'], fillcol=Tan
|
|||
return plots
|
||||
|
||||
|
||||
def gperrors(x, mu, lower, upper, edgecol=None, ax=None, fignum=None, **kwargs):
|
||||
_, axes = ax_default(fignum, ax)
|
||||
|
||||
mu = mu.flatten()
|
||||
x = x.flatten()
|
||||
lower = lower.flatten()
|
||||
upper = upper.flatten()
|
||||
|
||||
plots = []
|
||||
|
||||
if edgecol is None:
|
||||
edgecol='#3300FF'
|
||||
|
||||
if not 'alpha' in kwargs.keys():
|
||||
kwargs['alpha'] = 1.
|
||||
|
||||
|
||||
if not 'lw' in kwargs.keys():
|
||||
kwargs['lw'] = 1.
|
||||
|
||||
|
||||
plots.append(axes.errorbar(x,mu,yerr=np.vstack([mu-lower,upper-mu]),color=edgecol,**kwargs))
|
||||
plots[-1][0].remove()
|
||||
return plots
|
||||
|
||||
|
||||
def removeRightTicks(ax=None):
|
||||
ax = ax or pb.gca()
|
||||
for i, line in enumerate(ax.get_yticklines()):
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@ import itertools
|
|||
try:
|
||||
import Tango
|
||||
from matplotlib.cm import get_cmap
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
from matplotlib import cm
|
||||
except:
|
||||
pass
|
||||
|
||||
|
|
@ -114,7 +115,7 @@ def plot_latent(model, labels=None, which_indices=None,
|
|||
|
||||
# create a function which computes the shading of latent space according to the output variance
|
||||
def plot_function(x):
|
||||
Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
|
||||
Xtest_full = np.zeros((x.shape[0], X.shape[1]))
|
||||
Xtest_full[:, [input_1, input_2]] = x
|
||||
_, var = model.predict(Xtest_full, **predict_kwargs)
|
||||
var = var[:, :1]
|
||||
|
|
@ -137,7 +138,7 @@ def plot_latent(model, labels=None, which_indices=None,
|
|||
view = ImshowController(ax, plot_function,
|
||||
(xmin, ymin, xmax, ymax),
|
||||
resolution, aspect=aspect, interpolation='bilinear',
|
||||
cmap=pb.cm.binary, **imshow_kwargs)
|
||||
cmap=cm.binary, **imshow_kwargs)
|
||||
|
||||
# make sure labels are in order of input:
|
||||
labels = np.asarray(labels)
|
||||
|
|
@ -192,17 +193,18 @@ def plot_latent(model, labels=None, which_indices=None,
|
|||
|
||||
if updates:
|
||||
try:
|
||||
ax.figure.canvas.show()
|
||||
fig.canvas.show()
|
||||
except Exception as e:
|
||||
print("Could not invoke show: {}".format(e))
|
||||
raw_input('Enter to continue')
|
||||
view.deactivate()
|
||||
#raw_input('Enter to continue')
|
||||
return view
|
||||
return ax
|
||||
|
||||
def plot_magnification(model, labels=None, which_indices=None,
|
||||
resolution=60, ax=None, marker='o', s=40,
|
||||
fignum=None, plot_inducing=False, legend=True,
|
||||
aspect='auto', updates=False):
|
||||
plot_limits=None,
|
||||
aspect='auto', updates=False, mean=True, covariance=True, kern=None):
|
||||
"""
|
||||
:param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
|
||||
:param resolution: the resolution of the grid on which to evaluate the predictive variance
|
||||
|
|
@ -210,6 +212,8 @@ def plot_magnification(model, labels=None, which_indices=None,
|
|||
if ax is None:
|
||||
fig = pb.figure(num=fignum)
|
||||
ax = fig.add_subplot(111)
|
||||
else:
|
||||
fig = ax.figure
|
||||
Tango.reset()
|
||||
|
||||
if labels is None:
|
||||
|
|
@ -217,19 +221,90 @@ def plot_magnification(model, labels=None, which_indices=None,
|
|||
|
||||
input_1, input_2 = most_significant_input_dimensions(model, which_indices)
|
||||
|
||||
# first, plot the output variance as a function of the latent space
|
||||
Xtest, xx, yy, xmin, xmax = x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution)
|
||||
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
|
||||
#fethch the data points X that we'd like to plot
|
||||
X = model.X
|
||||
if isinstance(X, VariationalPosterior):
|
||||
X = X.mean
|
||||
else:
|
||||
X = X
|
||||
|
||||
if X.shape[0] > 1000:
|
||||
print("Warning: subsampling X, as it has more samples then 1000. X.shape={!s}".format(X.shape))
|
||||
subsample = np.random.choice(X.shape[0], size=1000, replace=False)
|
||||
X = X[subsample]
|
||||
labels = labels[subsample]
|
||||
#=======================================================================
|
||||
# <<<WORK IN PROGRESS>>>
|
||||
# <<<DO NOT DELETE>>>
|
||||
# plt.close('all')
|
||||
# fig, ax = plt.subplots(1,1)
|
||||
# from GPy.plotting.matplot_dep.dim_reduction_plots import most_significant_input_dimensions
|
||||
# import matplotlib.patches as mpatches
|
||||
# i1, i2 = most_significant_input_dimensions(m, None)
|
||||
# xmin, xmax = 100, -100
|
||||
# ymin, ymax = 100, -100
|
||||
# legend_handles = []
|
||||
#
|
||||
# X = m.X.mean[:, [i1, i2]]
|
||||
# X = m.X.variance[:, [i1, i2]]
|
||||
#
|
||||
# xmin = X[:,0].min(); xmax = X[:,0].max()
|
||||
# ymin = X[:,1].min(); ymax = X[:,1].max()
|
||||
# range_ = [[xmin, xmax], [ymin, ymax]]
|
||||
# ul = np.unique(labels)
|
||||
#
|
||||
# for i, l in enumerate(ul):
|
||||
# #cdict = dict(red =[(0., colors[i][0], colors[i][0]), (1., colors[i][0], colors[i][0])],
|
||||
# # green=[(0., colors[i][0], colors[i][1]), (1., colors[i][1], colors[i][1])],
|
||||
# # blue =[(0., colors[i][0], colors[i][2]), (1., colors[i][2], colors[i][2])],
|
||||
# # alpha=[(0., 0., .0), (.5, .5, .5), (1., .5, .5)])
|
||||
# #cmap = LinearSegmentedColormap('{}'.format(l), cdict)
|
||||
# cmap = LinearSegmentedColormap.from_list('cmap_{}'.format(str(l)), [colors[i], colors[i]], 255)
|
||||
# cmap._init()
|
||||
# #alphas = .5*(1+scipy.special.erf(np.linspace(-2,2, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
|
||||
# alphas = (scipy.special.erf(np.linspace(0,2.4, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
|
||||
# cmap._lut[:, -1] = alphas
|
||||
# print l
|
||||
# x, y = X[labels==l].T
|
||||
#
|
||||
# heatmap, xedges, yedges = np.histogram2d(x, y, bins=300, range=range_)
|
||||
# #heatmap, xedges, yedges = np.histogram2d(x, y, bins=100)
|
||||
#
|
||||
# im = ax.imshow(heatmap, extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], cmap=cmap, aspect='auto', interpolation='nearest', label=str(l))
|
||||
# legend_handles.append(mpatches.Patch(color=colors[i], label=l))
|
||||
# ax.set_xlim(xmin, xmax)
|
||||
# ax.set_ylim(ymin, ymax)
|
||||
# plt.legend(legend_handles, [l.get_label() for l in legend_handles])
|
||||
# plt.draw()
|
||||
# plt.show()
|
||||
#=======================================================================
|
||||
|
||||
#Create an IMshow controller that can re-plot the latent space shading at a good resolution
|
||||
if plot_limits is None:
|
||||
xmin, ymin = X[:, [input_1, input_2]].min(0)
|
||||
xmax, ymax = X[:, [input_1, input_2]].max(0)
|
||||
x_r, y_r = xmax-xmin, ymax-ymin
|
||||
xmin -= .1*x_r
|
||||
xmax += .1*x_r
|
||||
ymin -= .1*y_r
|
||||
ymax += .1*y_r
|
||||
else:
|
||||
try:
|
||||
xmin, xmax, ymin, ymax = plot_limits
|
||||
except (TypeError, ValueError) as e:
|
||||
raise e.__class__("Wrong plot limits: {} given -> need (xmin, xmax, ymin, ymax)".format(plot_limits))
|
||||
|
||||
|
||||
def plot_function(x):
|
||||
Xtest_full = np.zeros((x.shape[0], X.shape[1]))
|
||||
Xtest_full[:, [input_1, input_2]] = x
|
||||
mf=model.magnification(Xtest_full)
|
||||
mf = model.predict_magnification(Xtest_full, kern=kern, mean=mean, covariance=covariance)
|
||||
return mf
|
||||
|
||||
view = ImshowController(ax, plot_function,
|
||||
tuple(model.X.min(0)[:, [input_1, input_2]]) + tuple(model.X.max(0)[:, [input_1, input_2]]),
|
||||
(xmin, ymin, xmax, ymax),
|
||||
resolution, aspect=aspect, interpolation='bilinear',
|
||||
cmap=pb.cm.gray)
|
||||
cmap=cm.get_cmap('Greys'))
|
||||
|
||||
# make sure labels are in order of input:
|
||||
ulabels = []
|
||||
|
|
@ -245,17 +320,17 @@ def plot_magnification(model, labels=None, which_indices=None,
|
|||
elif type(ul) is np.int64:
|
||||
this_label = 'class %i' % ul
|
||||
else:
|
||||
this_label = 'class %i' % i
|
||||
this_label = unicode(ul)
|
||||
m = marker.next()
|
||||
|
||||
index = np.nonzero(labels == ul)[0]
|
||||
if model.input_dim == 1:
|
||||
x = model.X[index, input_1]
|
||||
x = X[index, input_1]
|
||||
y = np.zeros(index.size)
|
||||
else:
|
||||
x = model.X[index, input_1]
|
||||
y = model.X[index, input_2]
|
||||
ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label)
|
||||
x = X[index, input_1]
|
||||
y = X[index, input_2]
|
||||
ax.scatter(x, y, marker=m, s=s, c=Tango.nextMedium(), label=this_label, linewidth=.2, edgecolor='k', alpha=.9)
|
||||
|
||||
ax.set_xlabel('latent dimension %i' % input_1)
|
||||
ax.set_ylabel('latent dimension %i' % input_2)
|
||||
|
|
@ -263,19 +338,29 @@ def plot_magnification(model, labels=None, which_indices=None,
|
|||
if not np.all(labels == 1.) and legend:
|
||||
ax.legend(loc=0, numpoints=1)
|
||||
|
||||
ax.set_xlim(xmin[0], xmax[0])
|
||||
ax.set_ylim(xmin[1], xmax[1])
|
||||
ax.grid(b=False) # remove the grid if present, it doesn't look good
|
||||
ax.set_aspect('auto') # set a nice aspect ratio
|
||||
ax.set_xlim((xmin, xmax))
|
||||
ax.set_ylim((ymin, ymax))
|
||||
|
||||
if plot_inducing:
|
||||
ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w')
|
||||
if plot_inducing and hasattr(model, 'Z'):
|
||||
Z = model.Z
|
||||
ax.scatter(Z[:, input_1], Z[:, input_2], c='w', s=18, marker="^", edgecolor='k', linewidth=.3, alpha=.7)
|
||||
|
||||
try:
|
||||
fig.canvas.draw()
|
||||
fig.tight_layout()
|
||||
fig.canvas.draw()
|
||||
except Exception as e:
|
||||
print("Could not invoke tight layout: {}".format(e))
|
||||
pass
|
||||
|
||||
if updates:
|
||||
fig.canvas.show()
|
||||
raw_input('Enter to continue')
|
||||
|
||||
pb.title('Magnification Factor')
|
||||
try:
|
||||
fig.canvas.draw()
|
||||
fig.canvas.show()
|
||||
except Exception as e:
|
||||
print("Could not invoke show: {}".format(e))
|
||||
#raw_input('Enter to continue')
|
||||
return view
|
||||
return ax
|
||||
|
||||
|
||||
|
|
@ -314,8 +399,8 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
|
|||
this_label = 'class %i' % i
|
||||
m = marker.next()
|
||||
index = np.nonzero(data_labels == ul)[0]
|
||||
x = model.X[index, input_1]
|
||||
y = model.X[index, input_2]
|
||||
x = X[index, input_1]
|
||||
y = X[index, input_2]
|
||||
ax.scatter(x, y, marker=m, s=data_s, color=Tango.nextMedium(), label=this_label)
|
||||
|
||||
ax.set_xlabel('latent dimension %i' % input_1)
|
||||
|
|
@ -323,7 +408,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
|
|||
|
||||
controller = ImAnnotateController(ax,
|
||||
plot_function,
|
||||
tuple(model.X.min(0)[:, significant_dims]) + tuple(model.X.max(0)[:, significant_dims]),
|
||||
tuple(X.min(0)[:, significant_dims]) + tuple(X.max(0)[:, significant_dims]),
|
||||
resolution=resolution,
|
||||
aspect=aspect,
|
||||
cmap=get_cmap('jet'),
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
try:
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
except:
|
||||
pass
|
||||
#import numpy as np
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
import Tango
|
||||
from matplotlib.textpath import TextPath
|
||||
from matplotlib.transforms import offset_copy
|
||||
|
|
|
|||
|
|
@ -9,6 +9,9 @@ class AxisEventController(object):
|
|||
def __init__(self, ax):
|
||||
self.ax = ax
|
||||
self.activate()
|
||||
def __del__(self):
|
||||
self.deactivate()
|
||||
return self
|
||||
def deactivate(self):
|
||||
for cb_class in self.ax.callbacks.callbacks.values():
|
||||
for cb_num in cb_class.keys():
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
import numpy as np
|
||||
try:
|
||||
import Tango
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
except:
|
||||
pass
|
||||
from base_plots import x_frame1D, x_frame2D
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
import numpy as np
|
||||
try:
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
from matplotlib.patches import Polygon
|
||||
from matplotlib.collections import PatchCollection
|
||||
#from matplotlib import cm
|
||||
|
|
|
|||
|
|
@ -1,25 +1,82 @@
|
|||
# Copyright (c) 2012-2015, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
try:
|
||||
import Tango
|
||||
import pylab as pb
|
||||
except:
|
||||
pass
|
||||
import numpy as np
|
||||
from base_plots import gpplot, x_frame1D, x_frame2D
|
||||
from . import Tango
|
||||
from .base_plots import gpplot, x_frame1D, x_frame2D,gperrors
|
||||
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
|
||||
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
|
||||
from ...models.warped_gp import WarpedGP
|
||||
from scipy import sparse
|
||||
from ...core.parameterization.variational import VariationalPosterior
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
|
||||
def plot_data(model, which_data_rows='all',
|
||||
which_data_ycols='all', visible_dims=None,
|
||||
fignum=None, ax=None, data_symbol='kx',mew=1.5):
|
||||
"""
|
||||
Plot the training data
|
||||
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
|
||||
|
||||
Can plot only part of the data
|
||||
using which_data_rows and which_data_ycols.
|
||||
|
||||
:param which_data_rows: which of the training data to plot (default all)
|
||||
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
|
||||
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
||||
:type which_data_rows: 'all' or a list of integers
|
||||
:param visible_dims: an array specifying the input dimensions to plot (maximum two)
|
||||
:type visible_dims: a numpy array
|
||||
:param fignum: figure to plot on.
|
||||
:type fignum: figure number
|
||||
:param ax: axes to plot on.
|
||||
:type ax: axes handle
|
||||
"""
|
||||
#deal with optional arguments
|
||||
if which_data_rows == 'all':
|
||||
which_data_rows = slice(None)
|
||||
if which_data_ycols == 'all':
|
||||
which_data_ycols = np.arange(model.output_dim)
|
||||
|
||||
if ax is None:
|
||||
fig = plt.figure(num=fignum)
|
||||
ax = fig.add_subplot(111)
|
||||
|
||||
#data
|
||||
X = model.X
|
||||
Y = model.Y
|
||||
|
||||
#work out what the inputs are for plotting (1D or 2D)
|
||||
if visible_dims is None:
|
||||
visible_dims = np.arange(model.input_dim)
|
||||
assert visible_dims.size <= 2, "Visible inputs cannot be larger than two"
|
||||
free_dims = visible_dims
|
||||
plots = {}
|
||||
#one dimensional plotting
|
||||
if len(free_dims) == 1:
|
||||
|
||||
for d in which_data_ycols:
|
||||
plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=mew)
|
||||
|
||||
#2D plotting
|
||||
elif len(free_dims) == 2:
|
||||
|
||||
for d in which_data_ycols:
|
||||
plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40,
|
||||
Y[which_data_rows, d], cmap=plt.cm.jet, vmin=Y.min(), vmax=Y.max(), linewidth=0.)
|
||||
|
||||
else:
|
||||
raise NotImplementedError("Cannot define a frame with more than two input dimensions")
|
||||
return plots
|
||||
|
||||
|
||||
def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||
which_data_ycols='all', fixed_inputs=[],
|
||||
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||
plot_raw=False,
|
||||
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx',
|
||||
apply_link=False, samples_f=0, plot_uncertain_inputs=True, predict_kw=None):
|
||||
apply_link=False, samples_y=0, plot_uncertain_inputs=True, predict_kw=None, plot_training_data=True):
|
||||
"""
|
||||
Plot the posterior of the GP.
|
||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||
|
|
@ -37,25 +94,32 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
:type which_data_rows: 'all' or a list of integers
|
||||
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
||||
:type fixed_inputs: a list of tuples
|
||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||
:type resolution: int
|
||||
:param levels: number of levels to plot in a contour plot.
|
||||
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
||||
:type levels: int
|
||||
:param samples: the number of a posteriori samples to plot p(y*|y)
|
||||
:param samples: the number of a posteriori samples to plot p(f*|y)
|
||||
:type samples: int
|
||||
:param fignum: figure to plot on.
|
||||
:type fignum: figure number
|
||||
:param ax: axes to plot on.
|
||||
:type ax: axes handle
|
||||
:type output: integer (first output is 0)
|
||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||
:type resolution: int
|
||||
:param plot_raw: Whether to plot the raw function p(f|y)
|
||||
:type plot_raw: boolean
|
||||
:param linecol: color of line to plot.
|
||||
:type linecol:
|
||||
:type linecol: hex or color
|
||||
:param fillcol: color of fill
|
||||
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
||||
:param apply_link: apply the link function if plotting f (default false)
|
||||
:type fillcol: hex or color
|
||||
:param apply_link: apply the link function if plotting f (default false), as well as posterior samples if requested
|
||||
:type apply_link: boolean
|
||||
:param samples_f: the number of posteriori f samples to plot p(f*|y)
|
||||
:type samples_f: int
|
||||
:param samples_y: the number of posteriori f samples to plot p(y*|y)
|
||||
:type samples_y: int
|
||||
:param plot_uncertain_inputs: plot the uncertainty of the inputs as error bars if they have uncertainty (BGPLVM etc.)
|
||||
:type plot_uncertain_inputs: boolean
|
||||
:param predict_kw: keyword args for _raw_predict and predict functions if required
|
||||
:type predict_kw: dict
|
||||
:param plot_training_data: whether or not to plot the training points
|
||||
:type plot_training_data: boolean
|
||||
"""
|
||||
#deal with optional arguments
|
||||
if which_data_rows == 'all':
|
||||
|
|
@ -65,7 +129,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
#if len(which_data_ycols)==0:
|
||||
#raise ValueError('No data selected for plotting')
|
||||
if ax is None:
|
||||
fig = pb.figure(num=fignum)
|
||||
fig = plt.figure(num=fignum)
|
||||
ax = fig.add_subplot(111)
|
||||
|
||||
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
|
||||
|
|
@ -117,31 +181,38 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
Y_metadata = {'output_index': extra_data}
|
||||
else:
|
||||
Y_metadata['output_index'] = extra_data
|
||||
|
||||
if isinstance(model, WarpedGP):
|
||||
m, v = model.predict(Xgrid, full_cov=False, median=True, Y_metadata=Y_metadata, **predict_kw)
|
||||
#print np.concatenate((Xgrid, m), axis=1)
|
||||
else:
|
||||
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
|
||||
lower, upper = model.predict_quantiles(Xgrid, Y_metadata=Y_metadata)
|
||||
fmu, fv = model._raw_predict(Xgrid, full_cov=False, **predict_kw)
|
||||
lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=Y_metadata)
|
||||
|
||||
|
||||
|
||||
for d in which_data_ycols:
|
||||
plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
|
||||
if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
|
||||
#if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
|
||||
if not plot_raw and plot_training_data:
|
||||
plots['dataplot'] = plot_data(model=model, which_data_rows=which_data_rows,
|
||||
visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum)
|
||||
|
||||
|
||||
#optionally plot some samples
|
||||
if samples: #NOTE not tested with fixed_inputs
|
||||
Ysim = model.posterior_samples(Xgrid, samples, Y_metadata=Y_metadata)
|
||||
print Ysim.shape
|
||||
print Xnew.shape
|
||||
for yi in Ysim.T:
|
||||
plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
||||
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
|
||||
|
||||
if samples_f: #NOTE not tested with fixed_inputs
|
||||
Fsim = model.posterior_samples_f(Xgrid, samples_f)
|
||||
Fsim = model.posterior_samples_f(Xgrid, samples)
|
||||
if apply_link:
|
||||
Fsim = model.likelihood.gp_link.transf(Fsim)
|
||||
for fi in Fsim.T:
|
||||
plots['posterior_samples_f'] = ax.plot(Xnew, fi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
||||
plots['posterior_samples'] = ax.plot(Xnew, fi[:,None], '#3300FF', linewidth=0.25)
|
||||
#ax.plot(Xnew, fi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
|
||||
|
||||
if samples_y: #NOTE not tested with fixed_inputs
|
||||
Ysim = model.posterior_samples(Xgrid, samples_y, Y_metadata=Y_metadata)
|
||||
for yi in Ysim.T:
|
||||
plots['posterior_samples_y'] = ax.scatter(Xnew, yi[:,None], s=5, c=Tango.colorsHex['darkBlue'], marker='o', alpha=0.5)
|
||||
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
|
||||
|
||||
|
||||
|
|
@ -206,8 +277,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
|
||||
for d in which_data_ycols:
|
||||
m_d = m[:,d].reshape(resolution, resolution).T
|
||||
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||
if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
|
||||
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=plt.cm.jet)
|
||||
#if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
|
||||
if not plot_raw and plot_training_data:
|
||||
plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
|
||||
|
||||
#set the limits of the plot to some sensible values
|
||||
ax.set_xlim(xmin[0], xmax[0])
|
||||
|
|
@ -272,3 +345,82 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True, X_
|
|||
return f_inputs
|
||||
else:
|
||||
return X
|
||||
|
||||
|
||||
def errorbars_trainset(model, which_data_rows='all',
|
||||
which_data_ycols='all', fixed_inputs=[],
|
||||
fignum=None, ax=None,
|
||||
linecol='red', data_symbol='kx',
|
||||
predict_kw=None, plot_training_data=True, **kwargs):
|
||||
|
||||
"""
|
||||
Plot the posterior error bars corresponding to the training data
|
||||
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
|
||||
|
||||
Can plot only part of the data
|
||||
using which_data_rows and which_data_ycols.
|
||||
|
||||
:param which_data_rows: which of the training data to plot (default all)
|
||||
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
|
||||
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
||||
:type which_data_rows: 'all' or a list of integers
|
||||
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
||||
:type fixed_inputs: a list of tuples
|
||||
:param fignum: figure to plot on.
|
||||
:type fignum: figure number
|
||||
:param ax: axes to plot on.
|
||||
:type ax: axes handle
|
||||
:param plot_training_data: whether or not to plot the training points
|
||||
:type plot_training_data: boolean
|
||||
"""
|
||||
|
||||
#deal with optional arguments
|
||||
if which_data_rows == 'all':
|
||||
which_data_rows = slice(None)
|
||||
if which_data_ycols == 'all':
|
||||
which_data_ycols = np.arange(model.output_dim)
|
||||
|
||||
if ax is None:
|
||||
fig = plt.figure(num=fignum)
|
||||
ax = fig.add_subplot(111)
|
||||
|
||||
X = model.X
|
||||
Y = model.Y
|
||||
|
||||
if predict_kw is None:
|
||||
predict_kw = {}
|
||||
|
||||
|
||||
#work out what the inputs are for plotting (1D or 2D)
|
||||
fixed_dims = np.array([i for i,v in fixed_inputs])
|
||||
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
|
||||
plots = {}
|
||||
|
||||
#one dimensional plotting
|
||||
if len(free_dims) == 1:
|
||||
|
||||
m, v = model.predict(X, full_cov=False, Y_metadata=model.Y_metadata, **predict_kw)
|
||||
fmu, fv = model._raw_predict(X, full_cov=False, **predict_kw)
|
||||
lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=model.Y_metadata)
|
||||
|
||||
for d in which_data_ycols:
|
||||
plots['gperrors'] = gperrors(X, m[:, d], lower[:, d], upper[:, d], edgecol=linecol, ax=ax, fignum=fignum, **kwargs )
|
||||
if plot_training_data:
|
||||
plots['dataplot'] = plot_data(model=model, which_data_rows=which_data_rows,
|
||||
visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum)
|
||||
|
||||
|
||||
#set the limits of the plot to some sensible values
|
||||
ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))
|
||||
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
||||
ax.set_xlim(X[:,free_dims].min(), X[:,free_dims].max())
|
||||
ax.set_ylim(ymin, ymax)
|
||||
|
||||
|
||||
elif len(free_dims) == 2:
|
||||
raise NotImplementedError("Not implemented yet")
|
||||
|
||||
|
||||
else:
|
||||
raise NotImplementedError("Cannot define a frame with more than two input dimensions")
|
||||
return plots
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
import numpy as np
|
||||
try:
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
except:
|
||||
pass
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
|
||||
|
||||
def plot(model, ax=None, fignum=None, Z_height=None, **kwargs):
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import pylab as pb, numpy as np
|
||||
from matplotlib import pyplot as pb, numpy as np
|
||||
|
||||
def plot(parameterized, fignum=None, ax=None, colors=None, figsize=(12, 6)):
|
||||
"""
|
||||
|
|
|
|||
109
GPy/testing/bgplvm_minibatch_tests.py
Normal file
109
GPy/testing/bgplvm_minibatch_tests.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
'''
|
||||
Created on 4 Sep 2015
|
||||
|
||||
@author: maxz
|
||||
'''
|
||||
import unittest
|
||||
import numpy as np
|
||||
import GPy
|
||||
|
||||
class BGPLVMTest(unittest.TestCase):
|
||||
|
||||
|
||||
def setUp(self):
|
||||
np.random.seed(12345)
|
||||
X, W = np.random.normal(0,1,(100,6)), np.random.normal(0,1,(6,13))
|
||||
Y = X.dot(W) + np.random.normal(0, .1, (X.shape[0], W.shape[1]))
|
||||
self.inan = np.random.binomial(1, .1, Y.shape).astype(bool)
|
||||
self.X, self.W, self.Y = X,W,Y
|
||||
self.Q = 3
|
||||
self.m_full = GPy.models.BayesianGPLVM(Y, self.Q)
|
||||
|
||||
def test_lik_comparisons_m1_s0(self):
|
||||
# Test if the different implementations give the exact same likelihood as the full model.
|
||||
# All of the following settings should give the same likelihood and gradients as the full model:
|
||||
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=False)
|
||||
m[:] = self.m_full[:]
|
||||
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
|
||||
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
|
||||
assert(m.checkgrad())
|
||||
|
||||
def test_predict_missing_data(self):
|
||||
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
|
||||
m[:] = self.m_full[:]
|
||||
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
|
||||
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
|
||||
|
||||
self.assertRaises(NotImplementedError, m.predict, m.X, full_cov=True)
|
||||
|
||||
mu1, var1 = m.predict(m.X, full_cov=False)
|
||||
mu2, var2 = self.m_full.predict(self.m_full.X, full_cov=False)
|
||||
np.testing.assert_allclose(mu1, mu2)
|
||||
np.testing.assert_allclose(var1, var2)
|
||||
|
||||
mu1, var1 = m.predict(m.X.mean, full_cov=True)
|
||||
mu2, var2 = self.m_full.predict(self.m_full.X.mean, full_cov=True)
|
||||
np.testing.assert_allclose(mu1, mu2)
|
||||
np.testing.assert_allclose(var1[:,:,0], var2)
|
||||
|
||||
mu1, var1 = m.predict(m.X.mean, full_cov=False)
|
||||
mu2, var2 = self.m_full.predict(self.m_full.X.mean, full_cov=False)
|
||||
np.testing.assert_allclose(mu1, mu2)
|
||||
np.testing.assert_allclose(var1[:,[0]], var2)
|
||||
|
||||
def test_lik_comparisons_m0_s0(self):
|
||||
# Test if the different implementations give the exact same likelihood as the full model.
|
||||
# All of the following settings should give the same likelihood and gradients as the full model:
|
||||
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=False)
|
||||
m[:] = self.m_full[:]
|
||||
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
|
||||
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
|
||||
assert(m.checkgrad())
|
||||
|
||||
def test_lik_comparisons_m1_s1(self):
|
||||
# Test if the different implementations give the exact same likelihood as the full model.
|
||||
# All of the following settings should give the same likelihood and gradients as the full model:
|
||||
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
|
||||
m[:] = self.m_full[:]
|
||||
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
|
||||
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
|
||||
assert(m.checkgrad())
|
||||
|
||||
def test_lik_comparisons_m0_s1(self):
|
||||
# Test if the different implementations give the exact same likelihood as the full model.
|
||||
# All of the following settings should give the same likelihood and gradients as the full model:
|
||||
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=True, batchsize=self.Y.shape[1])
|
||||
m[:] = self.m_full[:]
|
||||
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
|
||||
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
|
||||
assert(m.checkgrad())
|
||||
|
||||
def test_gradients_missingdata(self):
|
||||
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=False, batchsize=self.Y.shape[1])
|
||||
assert(m.checkgrad())
|
||||
|
||||
def test_gradients_missingdata_stochastics(self):
|
||||
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=1)
|
||||
assert(m.checkgrad())
|
||||
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=4)
|
||||
assert(m.checkgrad())
|
||||
|
||||
def test_gradients_stochastics(self):
|
||||
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=True, batchsize=1)
|
||||
assert(m.checkgrad())
|
||||
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=True, batchsize=4)
|
||||
assert(m.checkgrad())
|
||||
|
||||
def test_predict(self):
|
||||
# Test if the different implementations give the exact same likelihood as the full model.
|
||||
# All of the following settings should give the same likelihood and gradients as the full model:
|
||||
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
|
||||
m[:] = self.m_full[:]
|
||||
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
|
||||
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
|
||||
assert(m.checkgrad())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#import sys;sys.argv = ['', 'Test.testName']
|
||||
unittest.main()
|
||||
37
GPy/testing/cacher_tests.py
Normal file
37
GPy/testing/cacher_tests.py
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
'''
|
||||
Created on 4 Sep 2015
|
||||
|
||||
@author: maxz
|
||||
'''
|
||||
import unittest
|
||||
from GPy.util.caching import Cacher
|
||||
from pickle import PickleError
|
||||
|
||||
|
||||
class Test(unittest.TestCase):
|
||||
def setUp(self):
|
||||
def op(x):
|
||||
return x
|
||||
self.cache = Cacher(op, 1)
|
||||
|
||||
def test_pickling(self):
|
||||
self.assertRaises(PickleError, self.cache.__getstate__)
|
||||
self.assertRaises(PickleError, self.cache.__setstate__)
|
||||
|
||||
def test_copy(self):
|
||||
tmp = self.cache.__deepcopy__()
|
||||
assert(tmp.operation is self.cache.operation)
|
||||
self.assertEqual(tmp.limit, self.cache.limit)
|
||||
|
||||
def test_reset(self):
|
||||
self.cache.reset()
|
||||
self.assertDictEqual(self.cache.cached_input_ids, {}, )
|
||||
self.assertDictEqual(self.cache.cached_outputs, {}, )
|
||||
self.assertDictEqual(self.cache.inputs_changed, {}, )
|
||||
|
||||
def test_name(self):
|
||||
assert(self.cache.__name__ == self.cache.operation.__name__)
|
||||
|
||||
if __name__ == "__main__":
|
||||
#import sys;sys.argv = ['', 'Test.testName']
|
||||
unittest.main()
|
||||
|
|
@ -2,11 +2,21 @@ import numpy as np
|
|||
import scipy as sp
|
||||
from GPy.util import choleskies
|
||||
import GPy
|
||||
from ..util.config import config
|
||||
import unittest
|
||||
|
||||
try:
|
||||
from ..util import linalg_cython
|
||||
from ..util import choleskies_cython
|
||||
config.set('cython', 'working', 'True')
|
||||
except ImportError:
|
||||
config.set('cython', 'working', 'False')
|
||||
|
||||
"""
|
||||
These tests make sure that the opure python and cython codes work the same
|
||||
These tests make sure that the pure python and cython codes work the same
|
||||
"""
|
||||
|
||||
@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
|
||||
class CythonTestChols(np.testing.TestCase):
|
||||
def setUp(self):
|
||||
self.flat = np.random.randn(45,5)
|
||||
|
|
@ -20,6 +30,7 @@ class CythonTestChols(np.testing.TestCase):
|
|||
A2 = choleskies._triang_to_flat_cython(self.triang)
|
||||
np.testing.assert_allclose(A1, A2)
|
||||
|
||||
@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
|
||||
class test_stationary(np.testing.TestCase):
|
||||
def setUp(self):
|
||||
self.k = GPy.kern.RBF(10)
|
||||
|
|
@ -49,17 +60,16 @@ class test_stationary(np.testing.TestCase):
|
|||
g2 = self.k._lengthscale_grads_cython(self.dKxz, self.X, self.Z)
|
||||
np.testing.assert_allclose(g1, g2)
|
||||
|
||||
@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
|
||||
class test_choleskies_backprop(np.testing.TestCase):
|
||||
def setUp(self):
|
||||
self.dL, self.L = np.random.randn(2, 100, 100)
|
||||
a =np.random.randn(10,12)
|
||||
A = a.dot(a.T)
|
||||
self.L = GPy.util.linalg.jitchol(A)
|
||||
self.dL = np.random.randn(10,10)
|
||||
def test(self):
|
||||
r1 = GPy.util.choleskies._backprop_gradient_pure(self.dL, self.L)
|
||||
r2 = GPy.util.choleskies.choleskies_cython.backprop_gradient(self.dL, self.L)
|
||||
r1 = choleskies._backprop_gradient_pure(self.dL, self.L)
|
||||
r2 = choleskies_cython.backprop_gradient(self.dL, self.L)
|
||||
r3 = choleskies_cython.backprop_gradient_par_c(self.dL, self.L)
|
||||
np.testing.assert_allclose(r1, r2)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
np.testing.assert_allclose(r1, r3)
|
||||
|
|
|
|||
99
GPy/testing/gp_tests.py
Normal file
99
GPy/testing/gp_tests.py
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
'''
|
||||
Created on 4 Sep 2015
|
||||
|
||||
@author: maxz
|
||||
'''
|
||||
import unittest
|
||||
import numpy as np, GPy
|
||||
from GPy.core.parameterization.variational import NormalPosterior
|
||||
|
||||
class Test(unittest.TestCase):
|
||||
|
||||
|
||||
def setUp(self):
|
||||
np.random.seed(12345)
|
||||
self.N = 20
|
||||
self.N_new = 50
|
||||
self.D = 1
|
||||
self.X = np.random.uniform(-3., 3., (self.N, 1))
|
||||
self.Y = np.sin(self.X) + np.random.randn(self.N, self.D) * 0.05
|
||||
self.X_new = np.random.uniform(-3., 3., (self.N_new, 1))
|
||||
|
||||
|
||||
def test_setxy_bgplvm(self):
|
||||
k = GPy.kern.RBF(1)
|
||||
m = GPy.models.BayesianGPLVM(self.Y, 2, kernel=k)
|
||||
mu, var = m.predict(m.X)
|
||||
X = m.X.copy()
|
||||
Xnew = NormalPosterior(m.X.mean[:10].copy(), m.X.variance[:10].copy())
|
||||
m.set_XY(Xnew, m.Y[:10])
|
||||
assert(m.checkgrad())
|
||||
m.set_XY(X, self.Y)
|
||||
mu2, var2 = m.predict(m.X)
|
||||
np.testing.assert_allclose(mu, mu2)
|
||||
np.testing.assert_allclose(var, var2)
|
||||
|
||||
def test_setxy_gplvm(self):
|
||||
k = GPy.kern.RBF(1)
|
||||
m = GPy.models.GPLVM(self.Y, 2, kernel=k)
|
||||
mu, var = m.predict(m.X)
|
||||
X = m.X.copy()
|
||||
Xnew = X[:10].copy()
|
||||
m.set_XY(Xnew, m.Y[:10])
|
||||
assert(m.checkgrad())
|
||||
m.set_XY(X, self.Y)
|
||||
mu2, var2 = m.predict(m.X)
|
||||
np.testing.assert_allclose(mu, mu2)
|
||||
np.testing.assert_allclose(var, var2)
|
||||
|
||||
def test_setxy_gp(self):
|
||||
k = GPy.kern.RBF(1)
|
||||
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
|
||||
mu, var = m.predict(m.X)
|
||||
X = m.X.copy()
|
||||
m.set_XY(m.X[:10], m.Y[:10])
|
||||
assert(m.checkgrad())
|
||||
m.set_XY(X, self.Y)
|
||||
mu2, var2 = m.predict(m.X)
|
||||
np.testing.assert_allclose(mu, mu2)
|
||||
np.testing.assert_allclose(var, var2)
|
||||
|
||||
def test_mean_function(self):
|
||||
from GPy.core.parameterization.param import Param
|
||||
from GPy.core.mapping import Mapping
|
||||
class Parabola(Mapping):
|
||||
def __init__(self, variance, degree=2, name='parabola'):
|
||||
super(Parabola, self).__init__(1, 1, name)
|
||||
self.variance = Param('variance', np.ones(degree+1) * variance)
|
||||
self.degree = degree
|
||||
self.link_parameter(self.variance)
|
||||
|
||||
def f(self, X):
|
||||
p = self.variance[0] * np.ones(X.shape)
|
||||
for i in range(1, self.degree+1):
|
||||
p += self.variance[i] * X**(i)
|
||||
return p
|
||||
|
||||
def gradients_X(self, dL_dF, X):
|
||||
grad = np.zeros(X.shape)
|
||||
for i in range(1, self.degree+1):
|
||||
grad += (i) * self.variance[i] * X**(i-1)
|
||||
return grad
|
||||
|
||||
def update_gradients(self, dL_dF, X):
|
||||
for i in range(self.degree+1):
|
||||
self.variance.gradient[i] = (dL_dF * X**(i)).sum(0)
|
||||
X = np.linspace(-2, 2, 100)[:, None]
|
||||
k = GPy.kern.RBF(1)
|
||||
k.randomize()
|
||||
p = Parabola(.3)
|
||||
p.randomize()
|
||||
Y = p.f(X) + np.random.multivariate_normal(np.zeros(X.shape[0]), k.K(X)+np.eye(X.shape[0])*1e-8)[:,None] + np.random.normal(0, .1, (X.shape[0], 1))
|
||||
m = GPy.models.GPRegression(X, Y, mean_function=p)
|
||||
m.randomize()
|
||||
assert(m.checkgrad())
|
||||
_ = m.predict(m.X)
|
||||
|
||||
if __name__ == "__main__":
|
||||
#import sys;sys.argv = ['', 'Test.testName']
|
||||
unittest.main()
|
||||
|
|
@ -8,11 +8,12 @@ The test cases for various inference algorithms
|
|||
import unittest, itertools
|
||||
import numpy as np
|
||||
import GPy
|
||||
|
||||
#np.seterr(invalid='raise')
|
||||
|
||||
class InferenceXTestCase(unittest.TestCase):
|
||||
|
||||
def genData(self):
|
||||
np.random.seed(1)
|
||||
D1,D2,N = 12,12,50
|
||||
|
||||
x = np.linspace(0, 4 * np.pi, N)[:, None]
|
||||
|
|
|
|||
|
|
@ -6,9 +6,16 @@ import numpy as np
|
|||
import GPy
|
||||
import sys
|
||||
from GPy.core.parameterization.param import Param
|
||||
from ..util.config import config
|
||||
|
||||
verbose = 0
|
||||
|
||||
try:
|
||||
from ..util import linalg_cython
|
||||
config.set('cython', 'working', 'True')
|
||||
except ImportError:
|
||||
config.set('cython', 'working', 'False')
|
||||
|
||||
|
||||
class Kern_check_model(GPy.core.Model):
|
||||
"""
|
||||
|
|
@ -245,6 +252,11 @@ class KernelGradientTestsContinuous(unittest.TestCase):
|
|||
continuous_kerns = ['RBF', 'Linear']
|
||||
self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns]
|
||||
|
||||
def test_MLP(self):
|
||||
k = GPy.kern.MLP(self.D,ARD=True)
|
||||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
def test_Matern32(self):
|
||||
k = GPy.kern.Matern32(self.D)
|
||||
k.randomize()
|
||||
|
|
@ -313,6 +325,11 @@ class KernelGradientTestsContinuous(unittest.TestCase):
|
|||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
def test_standard_periodic(self):
|
||||
k = GPy.kern.StdPeriodic(self.D, self.D-1)
|
||||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
class KernelTestsMiscellaneous(unittest.TestCase):
|
||||
def setUp(self):
|
||||
N, D = 100, 10
|
||||
|
|
@ -366,6 +383,7 @@ class KernelTestsNonContinuous(unittest.TestCase):
|
|||
X2 = self.X2[self.X2[:,-1]!=2]
|
||||
self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
|
||||
|
||||
@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
|
||||
class Coregionalize_cython_test(unittest.TestCase):
|
||||
"""
|
||||
Make sure that the coregionalize kernel work with and without cython enabled
|
||||
|
|
@ -432,6 +450,104 @@ class KernelTestsProductWithZeroValues(unittest.TestCase):
|
|||
self.assertFalse(np.any(np.isnan(target)),
|
||||
"Gradient resulted in NaN")
|
||||
|
||||
class Kernel_Psi_statistics_GradientTests(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
from GPy.core.parameterization.variational import NormalPosterior
|
||||
N,M,Q = 100,20,3
|
||||
|
||||
X = np.random.randn(N,Q)
|
||||
X_var = np.random.rand(N,Q)+0.01
|
||||
self.Z = np.random.randn(M,Q)
|
||||
self.qX = NormalPosterior(X, X_var)
|
||||
|
||||
self.w1 = np.random.randn(N)
|
||||
self.w2 = np.random.randn(N,M)
|
||||
self.w3 = np.random.randn(M,M)
|
||||
self.w3 = self.w3+self.w3.T
|
||||
self.w3n = np.random.randn(N,M,M)
|
||||
self.w3n = self.w3n+np.swapaxes(self.w3n, 1,2)
|
||||
|
||||
def test_kernels(self):
|
||||
from GPy.kern import RBF,Linear,MLP
|
||||
Q = self.Z.shape[1]
|
||||
kernels = [RBF(Q,ARD=True), Linear(Q,ARD=True)]
|
||||
|
||||
for k in kernels:
|
||||
k.randomize()
|
||||
self._test_kernel_param(k)
|
||||
self._test_Z(k)
|
||||
self._test_qX(k)
|
||||
self._test_kernel_param(k, psi2n=True)
|
||||
self._test_Z(k, psi2n=True)
|
||||
self._test_qX(k, psi2n=True)
|
||||
|
||||
def _test_kernel_param(self, kernel, psi2n=False):
|
||||
|
||||
def f(p):
|
||||
kernel.param_array[:] = p
|
||||
psi0 = kernel.psi0(self.Z, self.qX)
|
||||
psi1 = kernel.psi1(self.Z, self.qX)
|
||||
if not psi2n:
|
||||
psi2 = kernel.psi2(self.Z, self.qX)
|
||||
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3*psi2).sum()
|
||||
else:
|
||||
psi2 = kernel.psi2n(self.Z, self.qX)
|
||||
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3n*psi2).sum()
|
||||
|
||||
def df(p):
|
||||
kernel.param_array[:] = p
|
||||
kernel.update_gradients_expectations(self.w1, self.w2, self.w3 if not psi2n else self.w3n, self.Z, self.qX)
|
||||
return kernel.gradient.copy()
|
||||
|
||||
from GPy.models import GradientChecker
|
||||
m = GradientChecker(f, df, kernel.param_array.copy())
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
def _test_Z(self, kernel, psi2n=False):
|
||||
|
||||
def f(p):
|
||||
psi0 = kernel.psi0(p, self.qX)
|
||||
psi1 = kernel.psi1(p, self.qX)
|
||||
psi2 = kernel.psi2(p, self.qX)
|
||||
if not psi2n:
|
||||
psi2 = kernel.psi2(p, self.qX)
|
||||
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3*psi2).sum()
|
||||
else:
|
||||
psi2 = kernel.psi2n(p, self.qX)
|
||||
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3n*psi2).sum()
|
||||
|
||||
def df(p):
|
||||
return kernel.gradients_Z_expectations(self.w1, self.w2, self.w3 if not psi2n else self.w3n, p, self.qX)
|
||||
|
||||
from GPy.models import GradientChecker
|
||||
m = GradientChecker(f, df, self.Z.copy())
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
def _test_qX(self, kernel, psi2n=False):
|
||||
|
||||
def f(p):
|
||||
self.qX.param_array[:] = p
|
||||
self.qX._trigger_params_changed()
|
||||
psi0 = kernel.psi0(self.Z, self.qX)
|
||||
psi1 = kernel.psi1(self.Z, self.qX)
|
||||
if not psi2n:
|
||||
psi2 = kernel.psi2(self.Z, self.qX)
|
||||
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3*psi2).sum()
|
||||
else:
|
||||
psi2 = kernel.psi2n(self.Z, self.qX)
|
||||
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3n*psi2).sum()
|
||||
|
||||
def df(p):
|
||||
self.qX.param_array[:] = p
|
||||
self.qX._trigger_params_changed()
|
||||
grad = kernel.gradients_qX_expectations(self.w1, self.w2, self.w3 if not psi2n else self.w3n, self.Z, self.qX)
|
||||
self.qX.set_gradients(grad)
|
||||
return self.qX.gradient.copy()
|
||||
|
||||
from GPy.models import GradientChecker
|
||||
m = GradientChecker(f, df, self.qX.param_array.copy())
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Running unit tests, please be (very) patient...")
|
||||
|
|
|
|||
|
|
@ -7,10 +7,8 @@ from GPy.models import GradientChecker
|
|||
import functools
|
||||
import inspect
|
||||
from GPy.likelihoods import link_functions
|
||||
from GPy.core.parameterization import Param
|
||||
from functools import partial
|
||||
#np.random.seed(300)
|
||||
#np.random.seed(4)
|
||||
fixed_seed = 7
|
||||
|
||||
#np.seterr(divide='raise')
|
||||
def dparam_partial(inst_func, *args):
|
||||
|
|
@ -105,6 +103,7 @@ class TestNoiseModels(object):
|
|||
Generic model checker
|
||||
"""
|
||||
def setUp(self):
|
||||
np.random.seed(fixed_seed)
|
||||
self.N = 15
|
||||
self.D = 3
|
||||
self.X = np.random.rand(self.N, self.D)*10
|
||||
|
|
@ -218,7 +217,8 @@ class TestNoiseModels(object):
|
|||
"constraints": [(".*variance", self.constrain_positive)]
|
||||
},
|
||||
"laplace": True,
|
||||
"ep": False # FIXME: Should be True when we have it working again
|
||||
"ep": False, # FIXME: Should be True when we have it working again
|
||||
"variational_expectations": True,
|
||||
},
|
||||
"Gaussian_log": {
|
||||
"model": GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var),
|
||||
|
|
@ -227,7 +227,8 @@ class TestNoiseModels(object):
|
|||
"vals": [self.var],
|
||||
"constraints": [(".*variance", self.constrain_positive)]
|
||||
},
|
||||
"laplace": True
|
||||
"laplace": True,
|
||||
"variational_expectations": True
|
||||
},
|
||||
#"Gaussian_probit": {
|
||||
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
|
||||
|
|
@ -252,7 +253,8 @@ class TestNoiseModels(object):
|
|||
"link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)],
|
||||
"laplace": True,
|
||||
"Y": self.binary_Y,
|
||||
"ep": False # FIXME: Should be True when we have it working again
|
||||
"ep": False, # FIXME: Should be True when we have it working again
|
||||
"variational_expectations": True
|
||||
},
|
||||
"Exponential_default": {
|
||||
"model": GPy.likelihoods.Exponential(),
|
||||
|
|
@ -347,6 +349,10 @@ class TestNoiseModels(object):
|
|||
ep = attributes["ep"]
|
||||
else:
|
||||
ep = False
|
||||
if "variational_expectations" in attributes:
|
||||
var_exp = attributes["variational_expectations"]
|
||||
else:
|
||||
var_exp = False
|
||||
|
||||
#if len(param_vals) > 1:
|
||||
#raise NotImplementedError("Cannot support multiple params in likelihood yet!")
|
||||
|
|
@ -377,6 +383,11 @@ class TestNoiseModels(object):
|
|||
if ep:
|
||||
#ep likelihood gradcheck
|
||||
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
|
||||
if var_exp:
|
||||
#Need to specify mu and var!
|
||||
yield self.t_varexp, model, Y, Y_metadata
|
||||
yield self.t_dexp_dmu, model, Y, Y_metadata
|
||||
yield self.t_dexp_dvar, model, Y, Y_metadata
|
||||
|
||||
|
||||
self.tearDown()
|
||||
|
|
@ -603,6 +614,87 @@ class TestNoiseModels(object):
|
|||
print(m)
|
||||
assert m.checkgrad(verbose=1, step=step)
|
||||
|
||||
################
|
||||
# variational expectations #
|
||||
################
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_varexp(self, model, Y, Y_metadata):
|
||||
#Test that the analytic implementation (if it exists) matches the generic gauss
|
||||
#hermite implementation
|
||||
print("\n{}".format(inspect.stack()[0][3]))
|
||||
#Make mu and var (marginal means and variances of q(f)) draws from a GP
|
||||
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
|
||||
L = GPy.util.linalg.jitchol(k)
|
||||
mu = L.dot(np.random.randn(*Y.shape))
|
||||
#Variance must be positive
|
||||
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
|
||||
|
||||
expectation = model.variational_expectations(Y=Y, m=mu, v=var, gh_points=None, Y_metadata=Y_metadata)[0]
|
||||
|
||||
#Implementation of gauss hermite integration
|
||||
shape = mu.shape
|
||||
gh_x, gh_w= np.polynomial.hermite.hermgauss(50)
|
||||
m,v,Y = mu.flatten(), var.flatten(), Y.flatten()
|
||||
#make a grid of points
|
||||
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None]
|
||||
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
|
||||
# broadcast needs to be handled carefully.
|
||||
logp = model.logpdf(X, Y[:,None], Y_metadata=Y_metadata)
|
||||
#average over the gird to get derivatives of the Gaussian's parameters
|
||||
#division by pi comes from fact that for each quadrature we need to scale by 1/sqrt(pi)
|
||||
expectation_gh = np.dot(logp, gh_w)/np.sqrt(np.pi)
|
||||
expectation_gh = expectation_gh.reshape(*shape)
|
||||
|
||||
np.testing.assert_almost_equal(expectation, expectation_gh, decimal=5)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_dexp_dmu(self, model, Y, Y_metadata):
|
||||
print("\n{}".format(inspect.stack()[0][3]))
|
||||
#Make mu and var (marginal means and variances of q(f)) draws from a GP
|
||||
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
|
||||
L = GPy.util.linalg.jitchol(k)
|
||||
mu = L.dot(np.random.randn(*Y.shape))
|
||||
#Variance must be positive
|
||||
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
|
||||
expectation = functools.partial(model.variational_expectations, Y=Y, v=var, gh_points=None, Y_metadata=Y_metadata)
|
||||
|
||||
#Function to get the nth returned value
|
||||
def F(mu):
|
||||
return expectation(m=mu)[0]
|
||||
def dmu(mu):
|
||||
return expectation(m=mu)[1]
|
||||
|
||||
grad = GradientChecker(F, dmu, mu.copy(), 'm')
|
||||
|
||||
grad.randomize()
|
||||
print(grad)
|
||||
print(model)
|
||||
assert grad.checkgrad(verbose=1)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_dexp_dvar(self, model, Y, Y_metadata):
|
||||
print("\n{}".format(inspect.stack()[0][3]))
|
||||
#Make mu and var (marginal means and variances of q(f)) draws from a GP
|
||||
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
|
||||
L = GPy.util.linalg.jitchol(k)
|
||||
mu = L.dot(np.random.randn(*Y.shape))
|
||||
#Variance must be positive
|
||||
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
|
||||
expectation = functools.partial(model.variational_expectations, Y=Y, m=mu, gh_points=None, Y_metadata=Y_metadata)
|
||||
|
||||
#Function to get the nth returned value
|
||||
def F(var):
|
||||
return expectation(v=var)[0]
|
||||
def dvar(var):
|
||||
return expectation(v=var)[2]
|
||||
|
||||
grad = GradientChecker(F, dvar, var.copy(), 'v')
|
||||
|
||||
self.constrain_positive('v', grad)
|
||||
#grad.randomize()
|
||||
print(grad)
|
||||
print(model)
|
||||
assert grad.checkgrad(verbose=1)
|
||||
|
||||
class LaplaceTests(unittest.TestCase):
|
||||
"""
|
||||
|
|
@ -610,6 +702,7 @@ class LaplaceTests(unittest.TestCase):
|
|||
"""
|
||||
|
||||
def setUp(self):
|
||||
np.random.seed(fixed_seed)
|
||||
self.N = 15
|
||||
self.D = 1
|
||||
self.X = np.random.rand(self.N, self.D)*10
|
||||
|
|
@ -705,7 +798,7 @@ class LaplaceTests(unittest.TestCase):
|
|||
post_mean_approx, post_var_approx, = m2.predict(X)
|
||||
|
||||
if debug:
|
||||
import pylab as pb
|
||||
from matplotlib import pyplot as pb
|
||||
pb.figure(5)
|
||||
pb.title('posterior means')
|
||||
pb.scatter(X, post_mean, c='g')
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import numpy as np
|
||||
import scipy as sp
|
||||
from GPy.util.linalg import jitchol
|
||||
import GPy
|
||||
from ..util.linalg import jitchol,trace_dot, ijk_jlk_to_il, ijk_ljk_to_ilk
|
||||
|
||||
class LinalgTests(np.testing.TestCase):
|
||||
def setUp(self):
|
||||
|
|
@ -37,18 +36,19 @@ class LinalgTests(np.testing.TestCase):
|
|||
except sp.linalg.LinAlgError:
|
||||
return True
|
||||
|
||||
def test_einsum_ijk_jlk_to_il(self):
|
||||
A = np.random.randn(50, 150, 5)
|
||||
B = np.random.randn(150, 100, 5)
|
||||
pure = np.einsum('ijk,jlk->il', A, B)
|
||||
quick = GPy.util.linalg.ijk_jlk_to_il(A, B)
|
||||
np.testing.assert_allclose(pure, quick)
|
||||
def test_trace_dot(self):
|
||||
N = 5
|
||||
A = np.random.rand(N,N)
|
||||
B = np.random.rand(N,N)
|
||||
trace = np.trace(A.dot(B))
|
||||
test_trace = trace_dot(A,B)
|
||||
np.testing.assert_allclose(trace,test_trace,atol=1e-13)
|
||||
|
||||
def test_einsum_ij_jlk_to_ilk(self):
|
||||
A = np.random.randn(15, 150, 5)
|
||||
B = np.random.randn(150, 50, 5)
|
||||
pure = np.einsum('ijk,jlk->il', A, B)
|
||||
quick = GPy.util.linalg.ijk_jlk_to_il(A,B)
|
||||
quick = ijk_jlk_to_il(A,B)
|
||||
np.testing.assert_allclose(pure, quick)
|
||||
|
||||
def test_einsum_ijk_ljk_to_ilk(self):
|
||||
|
|
@ -56,5 +56,5 @@ class LinalgTests(np.testing.TestCase):
|
|||
B = np.random.randn(150, 20, 5)
|
||||
#B = A.copy()
|
||||
pure = np.einsum('ijk,ljk->ilk', A, B)
|
||||
quick = GPy.util.linalg.ijk_ljk_to_ilk(A,B)
|
||||
quick = ijk_ljk_to_ilk(A,B)
|
||||
np.testing.assert_allclose(pure, quick)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import numpy as np
|
||||
import scipy as sp
|
||||
import scipy
|
||||
from scipy.special import cbrt
|
||||
from GPy.models import GradientChecker
|
||||
_lim_val = np.finfo(np.float64).max
|
||||
|
|
@ -79,8 +79,7 @@ class LinkFunctionTests(np.testing.TestCase):
|
|||
assert np.isinf(np.exp(np.log(self.f_upper_lim)))
|
||||
#Check the clipping works
|
||||
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
|
||||
#Need to look at most significant figures here rather than the decimals
|
||||
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), _lim_val, significant=5)
|
||||
self.assertTrue(np.isfinite(link.transf(self.f_upper_lim)))
|
||||
self.check_overflow(link, lim_of_inf)
|
||||
|
||||
#Check that it would otherwise fail
|
||||
|
|
@ -93,18 +92,18 @@ class LinkFunctionTests(np.testing.TestCase):
|
|||
link = Log_ex_1()
|
||||
lim_of_inf = _lim_val_exp
|
||||
|
||||
np.testing.assert_almost_equal(np.log1p(np.exp(self.mid_f)), link.transf(self.mid_f))
|
||||
assert np.isinf(np.log1p(np.exp(np.log(self.f_upper_lim))))
|
||||
np.testing.assert_almost_equal(scipy.special.log1p(np.exp(self.mid_f)), link.transf(self.mid_f))
|
||||
assert np.isinf(scipy.special.log1p(np.exp(np.log(self.f_upper_lim))))
|
||||
#Check the clipping works
|
||||
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
|
||||
#Need to look at most significant figures here rather than the decimals
|
||||
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), np.log1p(_lim_val), significant=5)
|
||||
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), scipy.special.log1p(_lim_val), significant=5)
|
||||
self.check_overflow(link, lim_of_inf)
|
||||
|
||||
#Check that it would otherwise fail
|
||||
beyond_lim_of_inf = lim_of_inf + 10.0
|
||||
old_err_state = np.seterr(over='ignore')
|
||||
self.assertTrue(np.isinf(np.log1p(np.exp(beyond_lim_of_inf))))
|
||||
self.assertTrue(np.isinf(scipy.special.log1p(np.exp(beyond_lim_of_inf))))
|
||||
np.seterr(**old_err_state)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
from __future__ import print_function
|
||||
import numpy as np
|
||||
import scipy as sp
|
||||
import GPy
|
||||
import warnings
|
||||
|
||||
class MiscTests(np.testing.TestCase):
|
||||
"""
|
||||
|
|
@ -11,8 +13,15 @@ class MiscTests(np.testing.TestCase):
|
|||
self._lim_val_exp = np.log(self._lim_val)
|
||||
|
||||
def test_safe_exp_upper(self):
|
||||
assert np.exp(self._lim_val_exp + 1) == np.inf
|
||||
assert GPy.util.misc.safe_exp(self._lim_val_exp + 1) < np.inf
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter('always') # always print
|
||||
assert np.isfinite(np.exp(self._lim_val_exp))
|
||||
assert np.isinf(np.exp(self._lim_val_exp + 1))
|
||||
assert np.isfinite(GPy.util.misc.safe_exp(self._lim_val_exp + 1))
|
||||
|
||||
print(w)
|
||||
print(len(w))
|
||||
assert len(w)<=1 # should have one overflow warning
|
||||
|
||||
def test_safe_exp_lower(self):
|
||||
assert GPy.util.misc.safe_exp(1e-10) < np.inf
|
||||
|
|
|
|||
|
|
@ -15,6 +15,13 @@ class MiscTests(unittest.TestCase):
|
|||
self.Y = np.sin(self.X) + np.random.randn(self.N, self.D) * 0.05
|
||||
self.X_new = np.random.uniform(-3., 3., (self.N_new, 1))
|
||||
|
||||
def test_setXY(self):
|
||||
m = GPy.models.GPRegression(self.X, self.Y)
|
||||
m.set_XY(np.vstack([self.X, np.random.rand(1,self.X.shape[1])]), np.vstack([self.Y, np.random.rand(1,self.Y.shape[1])]))
|
||||
m._trigger_params_changed()
|
||||
self.assertTrue(m.checkgrad())
|
||||
m.predict(m.X)
|
||||
|
||||
def test_raw_predict(self):
|
||||
k = GPy.kern.RBF(1)
|
||||
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
|
||||
|
|
@ -36,12 +43,78 @@ class MiscTests(unittest.TestCase):
|
|||
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
|
||||
np.testing.assert_almost_equal(mu_hat, mu)
|
||||
|
||||
def test_normalizer(self):
|
||||
k = GPy.kern.RBF(1)
|
||||
Y = self.Y
|
||||
mu, std = Y.mean(0), Y.std(0)
|
||||
m = GPy.models.GPRegression(self.X, Y, kernel=k, normalizer=True)
|
||||
m.optimize()
|
||||
assert(m.checkgrad())
|
||||
k = GPy.kern.RBF(1)
|
||||
m2 = GPy.models.GPRegression(self.X, (Y-mu)/std, kernel=k, normalizer=False)
|
||||
m2[:] = m[:]
|
||||
mu1, var1 = m.predict(m.X, full_cov=True)
|
||||
mu2, var2 = m2.predict(m2.X, full_cov=True)
|
||||
np.testing.assert_allclose(mu1, (mu2*std)+mu)
|
||||
np.testing.assert_allclose(var1, var2)
|
||||
mu1, var1 = m.predict(m.X, full_cov=False)
|
||||
mu2, var2 = m2.predict(m2.X, full_cov=False)
|
||||
np.testing.assert_allclose(mu1, (mu2*std)+mu)
|
||||
np.testing.assert_allclose(var1, var2)
|
||||
|
||||
q50n = m.predict_quantiles(m.X, (50,))
|
||||
q50 = m2.predict_quantiles(m2.X, (50,))
|
||||
np.testing.assert_allclose(q50n[0], (q50[0]*std)+mu)
|
||||
|
||||
def check_jacobian(self):
|
||||
try:
|
||||
import autograd.numpy as np, autograd as ag, GPy, matplotlib.pyplot as plt
|
||||
from GPy.models import GradientChecker, GPRegression
|
||||
except:
|
||||
raise self.skipTest("autograd not available to check gradients")
|
||||
def k(X, X2, alpha=1., lengthscale=None):
|
||||
if lengthscale is None:
|
||||
lengthscale = np.ones(X.shape[1])
|
||||
exp = 0.
|
||||
for q in range(X.shape[1]):
|
||||
exp += ((X[:, [q]] - X2[:, [q]].T)/lengthscale[q])**2
|
||||
#exp = np.sqrt(exp)
|
||||
return alpha * np.exp(-.5*exp)
|
||||
dk = ag.elementwise_grad(lambda x, x2: k(x, x2, alpha=ke.variance.values, lengthscale=ke.lengthscale.values))
|
||||
dkdk = ag.elementwise_grad(dk, argnum=1)
|
||||
|
||||
ke = GPy.kern.RBF(1, ARD=True)
|
||||
#ke.randomize()
|
||||
ke.variance = .2#.randomize()
|
||||
ke.lengthscale[:] = .5
|
||||
ke.randomize()
|
||||
X = np.linspace(-1, 1, 1000)[:,None]
|
||||
X2 = np.array([[0.]]).T
|
||||
np.testing.assert_allclose(ke.gradients_X([[1.]], X, X), dk(X, X))
|
||||
np.testing.assert_allclose(ke.gradients_XX([[1.]], X, X).sum(0), dkdk(X, X))
|
||||
np.testing.assert_allclose(ke.gradients_X([[1.]], X, X2), dk(X, X2))
|
||||
np.testing.assert_allclose(ke.gradients_XX([[1.]], X, X2).sum(0), dkdk(X, X2))
|
||||
|
||||
m = GPRegression(self.X, self.Y)
|
||||
def f(x):
|
||||
m.X[:] = x
|
||||
return m.log_likelihood()
|
||||
def df(x):
|
||||
m.X[:] = x
|
||||
return m.kern.gradients_X(m.grad_dict['dL_dK'], X)
|
||||
def ddf(x):
|
||||
m.X[:] = x
|
||||
return m.kern.gradients_XX(m.grad_dict['dL_dK'], X).sum(0)
|
||||
gc = GradientChecker(f, df, self.X)
|
||||
gc2 = GradientChecker(df, ddf, self.X)
|
||||
assert(gc.checkgrad())
|
||||
assert(gc2.checkgrad())
|
||||
|
||||
def test_sparse_raw_predict(self):
|
||||
k = GPy.kern.RBF(1)
|
||||
m = GPy.models.SparseGPRegression(self.X, self.Y, kernel=k)
|
||||
m.randomize()
|
||||
Z = m.Z[:]
|
||||
X = self.X[:]
|
||||
|
||||
# Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
|
||||
Kinv = m.posterior.woodbury_inv
|
||||
|
|
@ -127,11 +200,24 @@ class MiscTests(unittest.TestCase):
|
|||
m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
|
||||
kernel=k, missing_data=True)
|
||||
assert(m.checkgrad())
|
||||
mul, varl = m.predict(m.X)
|
||||
|
||||
k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
|
||||
m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
|
||||
m2 = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
|
||||
kernel=k, missing_data=True)
|
||||
assert(m.checkgrad())
|
||||
m2.kern.rbf.lengthscale[:] = 1e6
|
||||
m2.X[:] = m.X.param_array
|
||||
m2.likelihood[:] = m.likelihood[:]
|
||||
m2.kern.white[:] = m.kern.white[:]
|
||||
mu, var = m.predict(m.X)
|
||||
np.testing.assert_allclose(mul, mu)
|
||||
np.testing.assert_allclose(varl, var)
|
||||
|
||||
q50 = m.predict_quantiles(m.X, (50,))
|
||||
np.testing.assert_allclose(mul, q50[0])
|
||||
|
||||
|
||||
|
||||
def test_likelihood_replicate_kern(self):
|
||||
m = GPy.models.GPRegression(self.X, self.Y)
|
||||
|
|
@ -410,8 +496,8 @@ class GradientTests(np.testing.TestCase):
|
|||
self.check_model(rbf, model_type='SparseGPRegression', dimension=2)
|
||||
|
||||
def test_SparseGPRegression_rbf_linear_white_kern_1D(self):
|
||||
''' Testing the sparse GP regression with rbf kernel on 2d data '''
|
||||
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1)
|
||||
''' Testing the sparse GP regression with rbf kernel on 1d data '''
|
||||
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1) + GPy.kern.White(1, 1e-5)
|
||||
self.check_model(rbflin, model_type='SparseGPRegression', dimension=1)
|
||||
|
||||
def test_SparseGPRegression_rbf_linear_white_kern_2D(self):
|
||||
|
|
@ -419,14 +505,12 @@ class GradientTests(np.testing.TestCase):
|
|||
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
|
||||
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2)
|
||||
|
||||
# @unittest.expectedFailure
|
||||
def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self):
|
||||
''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs'''
|
||||
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
|
||||
raise unittest.SkipTest("This is not implemented yet!")
|
||||
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1)
|
||||
|
||||
# @unittest.expectedFailure
|
||||
def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self):
|
||||
''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs'''
|
||||
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1)
|
||||
|
|
@ -443,6 +527,16 @@ class GradientTests(np.testing.TestCase):
|
|||
m = GPy.models.GPLVM(Y, input_dim, kernel=k)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
def test_BCGPLVM_rbf_bias_white_kern_2D(self):
|
||||
""" Testing GPLVM with rbf + bias kernel """
|
||||
N, input_dim, D = 50, 1, 2
|
||||
X = np.random.rand(N, input_dim)
|
||||
k = GPy.kern.RBF(input_dim, 0.5, 0.9 * np.ones((1,))) + GPy.kern.Bias(input_dim, 0.1) + GPy.kern.White(input_dim, 0.05)
|
||||
K = k.K(X)
|
||||
Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
|
||||
m = GPy.models.BCGPLVM(Y, input_dim, kernel=k)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
def test_GPLVM_rbf_linear_white_kern_2D(self):
|
||||
""" Testing GPLVM with rbf + bias kernel """
|
||||
N, input_dim, D = 50, 1, 2
|
||||
|
|
@ -468,23 +562,8 @@ class GradientTests(np.testing.TestCase):
|
|||
Z = np.linspace(0, 15, 4)[:, None]
|
||||
kernel = GPy.kern.RBF(1)
|
||||
m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, Z=Z)
|
||||
# distribution = GPy.likelihoods.likelihood_functions.Bernoulli()
|
||||
# likelihood = GPy.likelihoods.EP(Y, distribution)
|
||||
# m = GPy.core.SparseGP(X, likelihood, kernel, Z)
|
||||
# m.ensure_default_constraints()
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
@unittest.expectedFailure
|
||||
def test_generalized_FITC(self):
|
||||
N = 20
|
||||
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
|
||||
k = GPy.kern.RBF(1) + GPy.kern.White(1)
|
||||
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
|
||||
m = GPy.models.FITCClassification(X, Y, kernel=k)
|
||||
m.update_likelihood_approximation()
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
@unittest.expectedFailure
|
||||
def test_multioutput_regression_1D(self):
|
||||
X1 = np.random.rand(50, 1) * 8
|
||||
X2 = np.random.rand(30, 1) * 5
|
||||
|
|
@ -494,12 +573,11 @@ class GradientTests(np.testing.TestCase):
|
|||
Y = np.vstack((Y1, Y2))
|
||||
|
||||
k1 = GPy.kern.RBF(1)
|
||||
m = GPy.models.GPMultioutputRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel_list=[k1])
|
||||
import ipdb;ipdb.set_trace()
|
||||
m.constrain_fixed('.*rbf_var', 1.)
|
||||
m = GPy.models.GPCoregionalizedRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel=k1)
|
||||
#import ipdb;ipdb.set_trace()
|
||||
#m.constrain_fixed('.*rbf_var', 1.)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
@unittest.expectedFailure
|
||||
def test_multioutput_sparse_regression_1D(self):
|
||||
X1 = np.random.rand(500, 1) * 8
|
||||
X2 = np.random.rand(300, 1) * 5
|
||||
|
|
@ -509,8 +587,7 @@ class GradientTests(np.testing.TestCase):
|
|||
Y = np.vstack((Y1, Y2))
|
||||
|
||||
k1 = GPy.kern.RBF(1)
|
||||
m = GPy.models.SparseGPMultioutputRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel_list=[k1])
|
||||
m.constrain_fixed('.*rbf_var', 1.)
|
||||
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel=k1)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
def test_gp_heteroscedastic_regression(self):
|
||||
|
|
@ -539,6 +616,7 @@ class GradientTests(np.testing.TestCase):
|
|||
self.assertTrue(m.checkgrad())
|
||||
|
||||
def test_gp_kronecker_gaussian(self):
|
||||
np.random.seed(0)
|
||||
N1, N2 = 30, 20
|
||||
X1 = np.random.randn(N1, 1)
|
||||
X2 = np.random.randn(N2, 1)
|
||||
|
|
@ -559,16 +637,16 @@ class GradientTests(np.testing.TestCase):
|
|||
|
||||
m.randomize()
|
||||
mm[:] = m[:]
|
||||
assert np.allclose(m.log_likelihood(), mm.log_likelihood())
|
||||
assert np.allclose(m.gradient, mm.gradient)
|
||||
self.assertTrue(np.allclose(m.log_likelihood(), mm.log_likelihood()))
|
||||
self.assertTrue(np.allclose(m.gradient, mm.gradient))
|
||||
X1test = np.random.randn(100, 1)
|
||||
X2test = np.random.randn(100, 1)
|
||||
mean1, var1 = m.predict(X1test, X2test)
|
||||
yy, xx = np.meshgrid(X2test, X1test)
|
||||
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
|
||||
mean2, var2 = mm.predict(Xgrid)
|
||||
assert np.allclose(mean1, mean2)
|
||||
assert np.allclose(var1, var2)
|
||||
self.assertTrue( np.allclose(mean1, mean2) )
|
||||
self.assertTrue( np.allclose(var1, var2) )
|
||||
|
||||
def test_gp_VGPC(self):
|
||||
num_obs = 25
|
||||
|
|
@ -576,7 +654,8 @@ class GradientTests(np.testing.TestCase):
|
|||
X = X[:, None]
|
||||
Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
|
||||
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
|
||||
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kern)
|
||||
lik = GPy.likelihoods.Gaussian()
|
||||
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kernel=kern, likelihood=lik)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -248,10 +248,16 @@ class ParameterizedTest(unittest.TestCase):
|
|||
m.randomize()
|
||||
self.assertEqual(m.p1, val)
|
||||
|
||||
def test_checkgrad(self):
|
||||
assert(self.testmodel.kern.checkgrad())
|
||||
assert(self.testmodel.kern.lengthscale.checkgrad())
|
||||
assert(self.testmodel.likelihood.checkgrad())
|
||||
|
||||
def test_printing(self):
|
||||
print(self.test1)
|
||||
print(self.param)
|
||||
print(self.test1[''])
|
||||
print(self.testmodel.hierarchy_name(False))
|
||||
|
||||
if __name__ == "__main__":
|
||||
#import sys;sys.argv = ['', 'Test.test_add_parameter']
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ from GPy.examples.dimensionality_reduction import mrd_simulation
|
|||
from GPy.core.parameterization.variational import NormalPosterior
|
||||
from GPy.models.gp_regression import GPRegression
|
||||
from functools import reduce
|
||||
from GPy.util.caching import Cacher
|
||||
from pickle import PicklingError
|
||||
|
||||
def toy_model():
|
||||
X = np.linspace(0,1,50)[:, None]
|
||||
|
|
@ -205,23 +207,6 @@ class Test(ListDictTestCase):
|
|||
def _callback(self, what, which):
|
||||
what.count += 1
|
||||
|
||||
@unittest.skip
|
||||
def test_add_observer(self):
|
||||
par = toy_model()
|
||||
par.name = "original"
|
||||
par.count = 0
|
||||
par.add_observer(self, self._callback, 1)
|
||||
pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
|
||||
self.assertNotIn(par.observers[0], pcopy.observers)
|
||||
pcopy = par.copy()
|
||||
pcopy.name = "copy"
|
||||
self.assertTrue(par.checkgrad())
|
||||
self.assertTrue(pcopy.checkgrad())
|
||||
self.assertTrue(pcopy.kern.checkgrad())
|
||||
import ipdb;ipdb.set_trace()
|
||||
self.assertIn(par.observers[0], pcopy.observers)
|
||||
self.assertEqual(par.count, 3)
|
||||
self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad
|
||||
|
||||
if __name__ == "__main__":
|
||||
#import sys;sys.argv = ['', 'Test.test_parameter_index_operations']
|
||||
|
|
|
|||
1
GPy/testing/run_coverage.sh
Executable file
1
GPy/testing/run_coverage.sh
Executable file
|
|
@ -0,0 +1 @@
|
|||
nosetests . --with-coverage --logging-level=INFO --cover-html --cover-html-dir=coverage --cover-package=GPy --cover-erase
|
||||
101
GPy/testing/rv_transformation_tests.py
Normal file
101
GPy/testing/rv_transformation_tests.py
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
# Written by Ilias Bilionis
|
||||
"""
|
||||
Test if hyperparameters in models are properly transformed.
|
||||
"""
|
||||
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import scipy.stats as st
|
||||
import GPy
|
||||
|
||||
|
||||
class TestModel(GPy.core.Model):
|
||||
"""
|
||||
A simple GPy model with one parameter.
|
||||
"""
|
||||
def __init__(self):
|
||||
GPy.core.Model.__init__(self, 'test_model')
|
||||
theta = GPy.core.Param('theta', 1.)
|
||||
self.link_parameter(theta)
|
||||
|
||||
def log_likelihood(self):
|
||||
return 0.
|
||||
|
||||
|
||||
class RVTransformationTestCase(unittest.TestCase):
|
||||
|
||||
def _test_trans(self, trans):
|
||||
m = TestModel()
|
||||
prior = GPy.priors.LogGaussian(.5, 0.1)
|
||||
m.theta.set_prior(prior)
|
||||
m.theta.unconstrain()
|
||||
m.theta.constrain(trans)
|
||||
# The PDF of the transformed variables
|
||||
p_phi = lambda phi : np.exp(-m._objective_grads(phi)[0])
|
||||
# To the empirical PDF of:
|
||||
theta_s = prior.rvs(100000)
|
||||
phi_s = trans.finv(theta_s)
|
||||
# which is essentially a kernel density estimation
|
||||
kde = st.gaussian_kde(phi_s)
|
||||
# We will compare the PDF here:
|
||||
phi = np.linspace(phi_s.min(), phi_s.max(), 100)
|
||||
# The transformed PDF of phi should be this:
|
||||
pdf_phi = np.array([p_phi(p) for p in phi])
|
||||
# UNCOMMENT TO SEE GRAPHICAL COMPARISON
|
||||
#import matplotlib.pyplot as plt
|
||||
#fig, ax = plt.subplots()
|
||||
#ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Histogram')
|
||||
#ax.plot(phi, kde(phi), '--', linewidth=2, label='Kernel Density Estimation')
|
||||
#ax.plot(phi, pdf_phi, ':', linewidth=2, label='Transformed PDF')
|
||||
#ax.set_xlabel(r'transformed $\theta$', fontsize=16)
|
||||
#ax.set_ylabel('PDF', fontsize=16)
|
||||
#plt.legend(loc='best')
|
||||
#plt.show(block=True)
|
||||
# END OF PLOT
|
||||
# The following test cannot be very accurate
|
||||
self.assertTrue(np.linalg.norm(pdf_phi - kde(phi)) / np.linalg.norm(kde(phi)) <= 1e-1)
|
||||
# Check the gradients at a few random points
|
||||
for i in range(10):
|
||||
m.theta = theta_s[i]
|
||||
self.assertTrue(m.checkgrad(verbose=True))
|
||||
|
||||
def test_Logexp(self):
|
||||
self._test_trans(GPy.constraints.Logexp())
|
||||
self._test_trans(GPy.constraints.Exponent())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
quit()
|
||||
m = TestModel()
|
||||
prior = GPy.priors.LogGaussian(0., .9)
|
||||
m.theta.set_prior(prior)
|
||||
|
||||
# The following should return the PDF in terms of the transformed quantities
|
||||
p_phi = lambda phi : np.exp(-m._objective_grads(phi)[0])
|
||||
|
||||
# Let's look at the transformation phi = log(exp(theta - 1))
|
||||
trans = GPy.constraints.Exponent()
|
||||
m.theta.constrain(trans)
|
||||
# Plot the transformed probability density
|
||||
phi = np.linspace(-8, 8, 100)
|
||||
fig, ax = plt.subplots()
|
||||
# Let's draw some samples of theta and transform them so that we see
|
||||
# which one is right
|
||||
theta_s = prior.rvs(10000)
|
||||
# Transform it to the new variables
|
||||
phi_s = trans.finv(theta_s)
|
||||
# And draw their histogram
|
||||
ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Empirical')
|
||||
# This is to be compared to the PDF of the model expressed in terms of these new
|
||||
# variables
|
||||
ax.plot(phi, [p_phi(p) for p in phi], label='Transformed PDF', linewidth=2)
|
||||
ax.set_xlim(-3, 10)
|
||||
ax.set_xlabel(r'transformed $\theta$', fontsize=16)
|
||||
ax.set_ylabel('PDF', fontsize=16)
|
||||
plt.legend(loc='best')
|
||||
# Now let's test the gradients
|
||||
m.checkgrad(verbose=True)
|
||||
# And show the plot
|
||||
plt.show(block=True)
|
||||
|
|
@ -15,5 +15,5 @@ from . import caching
|
|||
from . import diag
|
||||
from . import initialization
|
||||
from . import multioutput
|
||||
from . import linalg_gpu
|
||||
from . import parallel
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue