merging last master

This commit is contained in:
beckdaniel 2015-09-17 14:43:00 +01:00
commit 1a02c65a61
133 changed files with 13282 additions and 9562 deletions

View file

@ -17,7 +17,7 @@ before_install:
- sudo ln -s /run/shm /dev/shm
install:
- conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.7 scipy=0.12 matplotlib nose sphinx pip nose
- conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.9 scipy=0.16 matplotlib nose sphinx pip nose
#- pip install .
- python setup.py build_ext --inplace
#--use-mirrors

View file

@ -1,7 +1 @@
James Hensman
Nicolo Fusi
Ricardo Andrade
Nicolas Durrande
Alan Saul
Max Zwiessele
Neil D. Lawrence
See contributors.

View file

@ -21,16 +21,18 @@ from . import plotting
from .core import Model
from .core.parameterization import Param, Parameterized, ObsAr
from .__version__ import __version__
#@nottest
try:
#Get rid of nose dependency by only ignoring if you have nose installed
from nose.tools import nottest
@nottest
def tests():
Tester(testing).test(verbose=10)
def tests(verbose=10):
Tester(testing).test(verbose=verbose)
except:
def tests():
Tester(testing).test(verbose=10)
def tests(verbose=10):
Tester(testing).test(verbose=verbose)
def load(file_path):
"""

1
GPy/__version__.py Normal file
View file

@ -0,0 +1 @@
__version__ = "0.8.8"

View file

@ -7,6 +7,6 @@ from .parameterization.param import Param, ParamConcatenation
from .parameterization.observable_array import ObsAr
from .gp import GP
#from .svgp import SVGP
from .svgp import SVGP
from .sparse_gp import SparseGP
from .mapping import *

View file

@ -60,9 +60,11 @@ class GP(Model):
self.normalizer.scale_by(Y)
self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
self.Y = Y
else:
elif isinstance(Y, np.ndarray):
self.Y = ObsAr(Y)
self.Y_normalized = self.Y
else:
self.Y = Y
if Y.shape[0] != self.num_data:
#There can be cases where we want inputs than outputs, for example if we have multiple latent
@ -104,8 +106,23 @@ class GP(Model):
self.link_parameter(self.likelihood)
self.posterior = None
# The predictive variable to be used to predict using the posterior object's
# woodbury_vector and woodbury_inv is defined as predictive_variable
# as long as the posterior has the right woodbury entries.
# It is the input variable used for the covariance between
# X_star and the posterior of the GP.
# This is usually just a link to self.X (full GP) or self.Z (sparse GP).
# Make sure to name this variable and the predict functions will "just work"
# In maths the predictive variable is:
# K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
# W_{pp} := \texttt{Woodbury inv}
# p := _predictive_variable
def set_XY(self, X=None, Y=None, trigger_update=True):
@property
def _predictive_variable(self):
return self.X
def set_XY(self, X=None, Y=None):
"""
Set the input / output data of the model
This is useful if we wish to change our existing data but maintain the same model
@ -115,7 +132,7 @@ class GP(Model):
:param Y: output observations
:type Y: np.ndarray
"""
if trigger_update: self.update_model(False)
self.update_model(False)
if Y is not None:
if self.normalizer is not None:
self.normalizer.scale_by(Y)
@ -131,34 +148,33 @@ class GP(Model):
assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
self.unlink_parameter(self.X)
self.X = X
self.link_parameters(self.X)
self.link_parameter(self.X)
else:
self.unlink_parameter(self.X)
from ..core import Param
self.X = Param('latent mean',X)
self.link_parameters(self.X)
self.link_parameter(self.X)
else:
self.X = ObsAr(X)
if trigger_update: self.update_model(True)
if trigger_update: self._trigger_params_changed()
self.update_model(True)
def set_X(self,X, trigger_update=True):
def set_X(self,X):
"""
Set the input data of the model
:param X: input observations
:type X: np.ndarray
"""
self.set_XY(X=X, trigger_update=trigger_update)
self.set_XY(X=X)
def set_Y(self,Y, trigger_update=True):
def set_Y(self,Y):
"""
Set the output data of the model
:param X: output observations
:type X: np.ndarray
"""
self.set_XY(Y=Y, trigger_update=trigger_update)
self.set_XY(Y=Y)
def parameters_changed(self):
"""
@ -181,7 +197,7 @@ class GP(Model):
"""
return self._log_marginal_likelihood
def _raw_predict(self, _Xnew, full_cov=False, kern=None):
def _raw_predict(self, Xnew, full_cov=False, kern=None):
"""
For making predictions, does not account for normalization or likelihood
@ -197,24 +213,33 @@ class GP(Model):
if kern is None:
kern = self.kern
Kx = kern.K(_Xnew, self.X).T
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
Kx = kern.K(self._predictive_variable, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if len(mu.shape)==1:
mu = mu.reshape(-1,1)
if full_cov:
Kxx = kern.K(_Xnew)
var = Kxx - np.dot(Kx.T, WiKx)
Kxx = kern.K(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
from ..util.linalg import mdot
for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
var = var
else:
Kxx = kern.Kdiag(_Xnew)
var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1)
var[var<0.] = 0.
Kxx = kern.Kdiag(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
elif self.posterior.woodbury_inv.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]):
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
var = var
#add in the mean function
if self.mean_function is not None:
mu += self.mean_function.f(Xnew)
#force mu to be a column vector
if len(mu.shape)==1: mu = mu[:,None]
#add the mean function in
if not self.mean_function is None:
mu += self.mean_function.f(_Xnew)
return mu, var
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
@ -247,7 +272,7 @@ class GP(Model):
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
return mean, var
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None):
"""
Get the predictive quantiles around the prediction at X
@ -255,10 +280,12 @@ class GP(Model):
:type X: np.ndarray (Xnew x self.input_dim)
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
:type quantiles: tuple
:param kern: optional kernel to use for prediction
:type predict_kw: dict
:returns: list of quantiles for each X and predictive quantiles for interval combination
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
"""
m, v = self._raw_predict(X, full_cov=False)
m, v = self._raw_predict(X, full_cov=False, kern=kern)
if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
@ -292,6 +319,120 @@ class GP(Model):
return dmu_dX, dv_dX
def predict_jacobian(self, Xnew, kern=None, full_cov=True):
"""
Compute the derivatives of the posterior of the GP.
Given a set of points at which to predict X* (size [N*,Q]), compute the
mean and variance of the derivative. Resulting arrays are sized:
dL_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
Note that this is the mean and variance of the derivative,
not the derivative of the mean and variance! (See predictive_gradients for that)
dv_dX* -- [N*, Q], (since all outputs have the same variance)
If there is missing data, it is not implemented for now, but
there will be one output variance per output dimension.
:param X: The points at which to get the predictive gradients.
:type X: np.ndarray (Xnew x self.input_dim)
:param kern: The kernel to compute the jacobian for.
:param boolean full_cov: whether to return the full covariance of the jacobian.
:returns: dmu_dX, dv_dX
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ]
Note: We always return sum in input_dim gradients, as the off-diagonals
in the input_dim are not needed for further calculations.
This is a compromise for increase in speed. Mathematically the jacobian would
have another dimension in Q.
"""
if kern is None:
kern = self.kern
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
for i in range(self.output_dim):
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)
dK_dXnew_full = np.empty((self._predictive_variable.shape[0], Xnew.shape[0], Xnew.shape[1]))
for i in range(self._predictive_variable.shape[0]):
dK_dXnew_full[i] = kern.gradients_X([[1.]], Xnew, self._predictive_variable[[i]])
if full_cov:
dK2_dXdX = kern.gradients_XX([[1.]], Xnew)
else:
dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew)
def compute_cov_inner(wi):
if full_cov:
# full covariance gradients:
var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
else:
var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
return var_jac
if self.posterior.woodbury_inv.ndim == 3: # Missing data:
if full_cov:
var_jac = np.empty((Xnew.shape[0],Xnew.shape[0],Xnew.shape[1],self.output_dim))
for d in range(self.posterior.woodbury_inv.shape[2]):
var_jac[:, :, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
else:
var_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
for d in range(self.posterior.woodbury_inv.shape[2]):
var_jac[:, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
else:
var_jac = compute_cov_inner(self.posterior.woodbury_inv)
return mean_jac, var_jac
def predict_wishard_embedding(self, Xnew, kern=None, mean=True, covariance=True):
"""
Predict the wishard embedding G of the GP. This is the density of the
input of the GP defined by the probabilistic function mapping f.
G = J_mean.T*J_mean + output_dim*J_cov.
:param array-like Xnew: The points at which to evaluate the magnification.
:param :py:class:`~GPy.kern.Kern` kern: The kernel to use for the magnification.
Supplying only a part of the learning kernel gives insights into the density
of the specific kernel part of the input function. E.g. one can see how dense the
linear part of a kernel is compared to the non-linear part etc.
"""
if kern is None:
kern = self.kern
mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False)
mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
Sigma = np.zeros(mumuT.shape)
if var_jac.ndim == 3:
Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = var_jac.sum(-1)
else:
Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = self.output_dim*var_jac
G = 0.
if mean:
G += mumuT
if covariance:
G += Sigma
return G
def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True):
"""
Predict the magnification factor as
sqrt(det(G))
for each point N in Xnew
"""
G = self.predict_wishard_embedding(Xnew, kern, mean, covariance)
from ..util.linalg import jitchol
mag = np.empty(Xnew.shape[0])
for n in range(Xnew.shape[0]):
try:
mag[n] = np.sqrt(np.exp(2*np.sum(np.log(np.diag(jitchol(G[n, :, :]))))))
except:
mag[n] = np.sqrt(np.linalg.det(G[n, :, :]))
return mag
def posterior_samples_f(self,X,size=10, full_cov=True):
"""
Samples the posterior GP at the points X.
@ -395,8 +536,8 @@ class GP(Model):
def plot(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx', predict_kw=None):
plot_raw=False, linecol=None,fillcol=None, Y_metadata=None,
data_symbol='kx', predict_kw=None, plot_training_data=True, samples_y=0, apply_link=False):
"""
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -419,7 +560,7 @@ class GP(Model):
:param levels: number of levels to plot in a contour plot.
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:type levels: int
:param samples: the number of a posteriori samples to plot
:param samples: the number of a posteriori samples to plot, p(f*|y)
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
@ -433,6 +574,12 @@ class GP(Model):
:type Y_metadata: dict
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
:param plot_training_data: whether or not to plot the training points
:type plot_training_data: boolean
:param samples_y: the number of a posteriori samples to plot, p(y*|y)
:type samples_y: int
:param apply_link: if there is a link function of the likelihood, plot the link(f*) rather than f*, when plotting posterior samples f
:type apply_link: boolean
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots
@ -445,7 +592,103 @@ class GP(Model):
which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata,
data_symbol=data_symbol, predict_kw=predict_kw, **kw)
data_symbol=data_symbol, predict_kw=predict_kw,
plot_training_data=plot_training_data, samples_y=samples_y, apply_link=apply_link, **kw)
def plot_data(self, which_data_rows='all',
which_data_ycols='all', visible_dims=None,
fignum=None, ax=None, data_symbol='kx'):
"""
Plot the training data
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
Can plot only part of the data
using which_data_rows and which_data_ycols.
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_ycols: 'all' or a list of integers
:param visible_dims: an array specifying the input dimensions to plot (maximum two)
:type visible_dims: a numpy array
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:type levels: int
:param samples: the number of a posteriori samples to plot, p(f*|y)
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
:type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param fillcol: color of fill [Tango.colorsHex['lightBlue']]
:type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots
kw = {}
return models_plots.plot_data(self, which_data_rows,
which_data_ycols, visible_dims,
fignum, ax, data_symbol, **kw)
def errorbars_trainset(self, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[], fignum=None, ax=None,
linecol=None, data_symbol='kx', predict_kw=None, plot_training_data=True,lw=None):
"""
Plot the posterior error bars corresponding to the training data
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
Can plot only part of the data
using which_data_rows and which_data_ycols.
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param plot_training_data: whether or not to plot the training points
:type plot_training_data: boolean
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots
kw = {}
if lw is not None:
kw['lw'] = lw
return models_plots.errorbars_trainset(self, which_data_rows, which_data_ycols, fixed_inputs,
fignum, ax, linecol, data_symbol,
predict_kw, plot_training_data, **kw)
def plot_magnification(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
fignum=None, legend=True,
plot_limits=None,
aspect='auto', updates=False, plot_inducing=True, kern=None, **kwargs):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_magnification(self, labels, which_indices,
resolution, ax, marker, s,
fignum, plot_inducing, legend,
plot_limits, aspect, updates, **kwargs)
def input_sensitivity(self, summarize=True):
"""

View file

@ -32,7 +32,7 @@ class Bijective_mapping(Mapping):
also back from f to X. The inverse mapping is called g().
"""
def __init__(self, input_dim, output_dim, name='bijective_mapping'):
super(Bijective_apping, self).__init__(name=name)
super(Bijective_mapping, self).__init__(name=name)
def g(self, f):
"""Inverse mapping from output domain of the function to the inputs."""

View file

@ -180,6 +180,7 @@ class Param(Parameterizable, ObsAr):
import copy
Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
return s
def _setup_observers(self):
"""
Setup the default observers

View file

@ -197,9 +197,10 @@ class Parameterized(Parameterizable):
raise RuntimeError("{} does not seem to be a parameter, remove parameters directly from their respective parents".format(str(param)))
start = sum([p.size for p in self.parameters[:param._parent_index_]])
self._remove_parameter_name(param)
self.size -= param.size
del self.parameters[param._parent_index_]
self._remove_parameter_name(param)
param._disconnect_parent()
param.remove_observer(self, self._pass_through_notify_observers)
@ -315,7 +316,7 @@ class Parameterized(Parameterizable):
param[:] = val; return
except AttributeError:
pass
object.__setattr__(self, name, val);
return object.__setattr__(self, name, val);
#===========================================================================
# Pickling

View file

@ -366,6 +366,7 @@ class InverseGamma(Gamma):
def rvs(self, n):
return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
class DGPLVM_KFDA(Prior):
"""
Implementation of the Discriminative Gaussian Process Latent Variable function using
@ -512,6 +513,7 @@ class DGPLVM_KFDA(Prior):
self.A = self.compute_A(lst_ni)
self.x_shape = x_shape
class DGPLVM(Prior):
"""
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
@ -669,7 +671,7 @@ class DGPLVM(Prior):
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
# sb_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
@ -903,7 +905,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function
@ -927,7 +929,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw)
@ -1198,6 +1200,7 @@ class DGPLVM_T(Prior):
class HalfT(Prior):
"""
Implementation of the half student t probability function, coupled with random variables.
@ -1208,15 +1211,17 @@ class HalfT(Prior):
"""
domain = _POSITIVE
_instances = []
def __new__(cls, A, nu): # Singleton:
def __new__(cls, A, nu): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().A == A and instance().nu == nu:
return instance()
return instance()
o = super(Prior, cls).__new__(cls, A, nu)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, A, nu):
self.A = float(A)
self.nu = float(nu)
@ -1225,37 +1230,81 @@ class HalfT(Prior):
def __str__(self):
return "hT({:.2g}, {:.2g})".format(self.A, self.nu)
def lnpdf(self,theta):
return (theta>0) * ( self.constant -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 ) )
def lnpdf(self, theta):
return (theta > 0) * (self.constant - .5*(self.nu + 1) * np.log(1. + (1./self.nu) * (theta/self.A)**2))
#theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
#lnpdfs = np.zeros_like(theta)
#theta = np.array([theta])
#above_zero = theta.flatten()>1e-6
#v = self.nu
#sigma2=self.A
#stop
#lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5)
# - gammaln(v * 0.5)
# - 0.5*np.log(sigma2 * v * np.pi)
# - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2))
#)
#return lnpdfs
# theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
# lnpdfs = np.zeros_like(theta)
# theta = np.array([theta])
# above_zero = theta.flatten()>1e-6
# v = self.nu
# sigma2=self.A
# stop
# lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5)
# - gammaln(v * 0.5)
# - 0.5*np.log(sigma2 * v * np.pi)
# - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2))
# )
# return lnpdfs
def lnpdf_grad(self,theta):
theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
def lnpdf_grad(self, theta):
theta = theta if isinstance(theta, np.ndarray) else np.array([theta])
grad = np.zeros_like(theta)
above_zero = theta>1e-6
above_zero = theta > 1e-6
v = self.nu
sigma2=self.A
sigma2 = self.A
grad[above_zero] = -0.5*(v+1)*(2*theta[above_zero])/(v*sigma2 + theta[above_zero][0]**2)
return grad
def rvs(self, n):
#return np.random.randn(n) * self.sigma + self.mu
from scipy.stats import t
#[np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)])
ret = t.rvs(self.nu,loc=0,scale=self.A, size=n)
ret[ret<0] = 0
return ret
# return np.random.randn(n) * self.sigma + self.mu
from scipy.stats import t
# [np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)])
ret = t.rvs(self.nu, loc=0, scale=self.A, size=n)
ret[ret < 0] = 0
return ret
class Exponential(Prior):
"""
Implementation of the Exponential probability function,
coupled with random variables.
:param l: shape parameter
"""
domain = _POSITIVE
_instances = []
def __new__(cls, l): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().l == l:
return instance()
o = super(Exponential, cls).__new__(cls, l)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, l):
self.l = l
def __str__(self):
return "Exp({:.2g})".format(self.l)
def summary(self):
ret = {"E[x]": 1. / self.l,
"E[ln x]": np.nan,
"var[x]": 1. / self.l**2,
"Entropy": 1. - np.log(self.l),
"Mode": 0.}
return ret
def lnpdf(self, x):
return np.log(self.l) - self.l * x
def lnpdf_grad(self, x):
return - self.l
def rvs(self, n):
return np.random.exponential(scale=self.l, size=n)

View file

@ -62,7 +62,7 @@ class Transformation(object):
import matplotlib.pyplot as plt
from ...plotting.matplot_dep import base_plots
x = np.linspace(-8,8)
base_plots.meanplot(x, self.f(x),axes=axes*args,**kw)
base_plots.meanplot(x, self.f(x), *args, ax=axes, **kw)
axes = plt.gca()
axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel)

View file

@ -49,7 +49,7 @@ class SparseGP(GP):
else:
#inference_method = ??
raise NotImplementedError("what to do what to do?")
print("defaulting to ", inference_method, "for latent function inference")
print(("defaulting to ", inference_method, "for latent function inference"))
self.Z = Param('inducing inputs', Z)
self.num_inducing = Z.shape[0]
@ -60,6 +60,10 @@ class SparseGP(GP):
self.link_parameter(self.Z, index=0)
self.posterior = None
@property
def _predictive_variable(self):
return self.Z
def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior)
@ -117,45 +121,48 @@ class SparseGP(GP):
if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of,
we take only the diagonal elements across N.
For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we
return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD).
This is for both with and without missing data. See for missing data SparseGP implementation py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
For uncertain inputs, the SparseGP bound produces cannot predict the full covariance matrix full_cov for now.
The implementation of that will follow. However, for each dimension the
covariance changes, so if full_cov is False (standard), we return the variance
for each dimension [NxD].
"""
if kern is None: kern = self.kern
if not isinstance(Xnew, VariationalPosterior):
Kx = kern.K(self.Z, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = kern.K(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
var = var
else:
Kxx = kern.Kdiag(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]):
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
var = var
#add in the mean function
if self.mean_function is not None:
mu += self.mean_function.f(Xnew)
# Kx = kern.K(self._predictive_variable, Xnew)
# mu = np.dot(Kx.T, self.posterior.woodbury_vector)
# if full_cov:
# Kxx = kern.K(Xnew)
# if self.posterior.woodbury_inv.ndim == 2:
# var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
# elif self.posterior.woodbury_inv.ndim == 3:
# var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
# for i in range(var.shape[2]):
# var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
# var = var
# else:
# Kxx = kern.Kdiag(Xnew)
# if self.posterior.woodbury_inv.ndim == 2:
# var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
# elif self.posterior.woodbury_inv.ndim == 3:
# var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
# for i in range(var.shape[1]):
# var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
# var = var
# #add in the mean function
# if self.mean_function is not None:
# mu += self.mean_function.f(Xnew)
mu, var = super(SparseGP, self)._raw_predict(Xnew, full_cov, kern)
else:
psi0_star = kern.psi0(self.Z, Xnew)
psi1_star = kern.psi1(self.Z, Xnew)
psi0_star = kern.psi0(self._predictive_variable, Xnew)
psi1_star = kern.psi1(self._predictive_variable, Xnew)
#psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
la = self.posterior.woodbury_vector
mu = np.dot(psi1_star, la) # TODO: dimensions?
if full_cov:
raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.")
var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1]))
di = np.diag_indices(la.shape[1])
else:
@ -163,7 +170,7 @@ class SparseGP(GP):
for i in range(Xnew.shape[0]):
_mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
psi2_star = kern.psi2(self.Z, NormalPosterior(_mu, _var))
psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var))
tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
var_ = mdot(la.T, tmp, la)

View file

@ -34,7 +34,7 @@ class SparseGP_MPI(SparseGP):
"""
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False):
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp', Y_metadata=None, mpi_comm=None, normalizer=False):
self._IN_OPTIMIZATION_ = False
if mpi_comm != None:
if inference_method is None:

View file

@ -1,11 +1,11 @@
# Copyright (c) 2014, James Hensman, Alex Matthews
# Distributed under the terms of the GNU General public License, see LICENSE.txt
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..util import choleskies
from .sparse_gp import SparseGP
from .parameterization.param import Param
from ..inference.latent_function_inference import SVGP as svgp_inf
from ..inference.latent_function_inference.svgp import SVGP as svgp_inf
class SVGP(SparseGP):

View file

@ -24,7 +24,6 @@ class VerboseOptimization(object):
self.model.add_observer(self, self.print_status)
self.status = 'running'
self.clear = clear_after_finish
self.deltat = .2
self.update()
@ -80,6 +79,7 @@ class VerboseOptimization(object):
def __enter__(self):
self.start = time.time()
self._time = self.start
return self
def print_out(self, seconds):
@ -143,12 +143,12 @@ class VerboseOptimization(object):
def print_status(self, me, which=None):
self.update()
seconds = time.time()-self.start
t = time.time()
seconds = t-self.start
#sys.stdout.write(" "*len(self.message))
self.deltat += seconds
if self.deltat > .2:
if t-self._time > .3 or seconds < .3:
self.print_out(seconds)
self.deltat = 0
self._time = t
self.iteration += 1

View file

@ -3,7 +3,7 @@
import numpy as np
try:
import pylab as pb
from matplotlib import pyplot as pb
except:
pass
import GPy

View file

@ -77,7 +77,7 @@ def student_t_approx(optimize=True, plot=True):
debug=True
if debug:
m4.optimize(messages=1)
import pylab as pb
from matplotlib import pyplot as pb
pb.plot(m4.X, m4.inference_method.f_hat)
pb.plot(m4.X, m4.Y, 'rx')
m4.plot()

View file

@ -5,7 +5,7 @@
Gaussian Processes regression examples
"""
try:
import pylab as pb
from matplotlib import pyplot as pb
except:
pass
import numpy as np

View file

@ -69,7 +69,7 @@ from .expectation_propagation_dtc import EPDTC
from .dtc import DTC
from .fitc import FITC
from .var_dtc_parallel import VarDTC_minibatch
#from .svgp import SVGP
from .var_gauss import VarGauss
# class FullLatentFunctionData(object):
#

View file

@ -4,6 +4,8 @@
import numpy as np
from ...core import Model
from ...core.parameterization import variational
from ...util.linalg import tdot
from GPy.core.parameterization.variational import VariationalPosterior
def infer_newX(model, Y_new, optimize=True, init='L2'):
"""
@ -60,18 +62,19 @@ class InferenceX(Model):
# self.kern.GPU(True)
from copy import deepcopy
self.posterior = deepcopy(model.posterior)
if hasattr(model, 'variational_prior'):
from ...core.parameterization.variational import VariationalPosterior
if isinstance(model.X, VariationalPosterior):
self.uncertain_input = True
from ...models.ss_gplvm import IBPPrior
from ...models.ss_mrd import IBPPrior_SSMRD
if isinstance(model.variational_prior, IBPPrior) or isinstance(model.variational_prior, IBPPrior_SSMRD):
from ...core.parameterization.variational import SpikeAndSlabPrior
self.variational_prior = SpikeAndSlabPrior(pi=05,learnPi=False, group_spike=False)
self.variational_prior = SpikeAndSlabPrior(pi=0.5, learnPi=False, group_spike=False)
else:
self.variational_prior = model.variational_prior.copy()
else:
self.uncertain_input = False
if hasattr(model, 'inducing_inputs'):
if hasattr(model, 'Z'):
self.sparse_gp = True
self.Z = model.Z.copy()
else:
@ -125,13 +128,13 @@ class InferenceX(Model):
wv = wv[:,self.valid_dim]
output_dim = self.valid_dim.sum()
if self.ninan is not None:
self.dL_dpsi2 = beta/2.*(self.posterior.woodbury_inv[:,:,self.valid_dim] - np.einsum('md,od->mo',wv, wv)[:, :, None]).sum(-1)
self.dL_dpsi2 = beta/2.*(self.posterior.woodbury_inv[:,:,self.valid_dim] - tdot(wv)[:, :, None]).sum(-1)
else:
self.dL_dpsi2 = beta/2.*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))
self.dL_dpsi2 = beta/2.*(output_dim*self.posterior.woodbury_inv - tdot(wv))
self.dL_dpsi1 = beta*np.dot(self.Y[:,self.valid_dim], wv.T)
self.dL_dpsi0 = - beta/2.* np.ones(self.Y.shape[0])
else:
self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))/2.
self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - tdot(wv))/2. #np.einsum('md,od->mo',wv, wv)
self.dL_dpsi1 = beta*np.dot(self.Y, wv.T)
self.dL_dpsi0 = -beta/2.*output_dim* np.ones(self.Y.shape[0])

View file

@ -172,6 +172,7 @@ class Laplace(LatentFunctionInference):
def obj(Ki_f, f):
ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
if np.isnan(ll):
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
return -np.inf
else:
return ll

View file

@ -64,9 +64,7 @@ class VarDTC(LatentFunctionInference):
def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None):
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
_, output_dim = Y.shape
uncertain_inputs = isinstance(X, VariationalPosterior)
@ -95,17 +93,28 @@ class VarDTC(LatentFunctionInference):
# The rather complex computations of A, and the psi stats
if uncertain_inputs:
psi0 = kern.psi0(Z, X)
psi1 = kern.psi1(Z, X)
if psi0 is None:
psi0 = kern.psi0(Z, X)
if psi1 is None:
psi1 = kern.psi1(Z, X)
if het_noise:
psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
if psi2 is None:
assert len(psi2.shape) == 3 # Need to have not summed out N
#FIXME: Need testing
psi2_beta = np.sum([psi2[X[i:i+1,:], :, :] * beta_i for i,beta_i in enumerate(beta)],0)
else:
psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
else:
psi2_beta = kern.psi2(Z,X) * beta
if psi2 is None:
psi2 = kern.psi2(Z,X)
psi2_beta = psi2 * beta
LmInv = dtrtri(Lm)
A = LmInv.dot(psi2_beta.dot(LmInv.T))
else:
psi0 = kern.Kdiag(X)
psi1 = kern.K(X, Z)
if psi0 is None:
psi0 = kern.Kdiag(X)
if psi1 is None:
psi1 = kern.K(X, Z)
if het_noise:
tmp = psi1 * (np.sqrt(beta))
else:

View file

@ -172,17 +172,22 @@ class VarDTC_minibatch(LatentFunctionInference):
if not np.isfinite(Kmm).all():
print(Kmm)
Lm = jitchol(Kmm)
LmInv = dtrtri(Lm)
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
LmInvPsi2LmInvT = LmInv.dot(psi2_full.dot(LmInv.T))
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
LL = jitchol(Lambda)
LLInv = dtrtri(LL)
logdet_L = 2.*np.sum(np.log(np.diag(LL)))
b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0]
bbt = np.square(b).sum()
v = dtrtrs(Lm,dtrtrs(LL,b,trans=1)[0],trans=1)[0]
LmLLInv = LLInv.dot(LmInv)
tmp = -backsub_both_sides(LL, tdot(b)+output_dim*np.eye(input_dim), transpose='left')
dL_dpsi2R = backsub_both_sides(Lm, tmp+output_dim*np.eye(input_dim), transpose='left')/2.
b = psi1Y_full.dot(LmLLInv.T)
bbt = np.square(b).sum()
v = b.dot(LmLLInv).T
LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
tmp = -LLInv.T.dot(LLinvPsi1TYYTPsi1LLinvT+output_dim*np.eye(input_dim)).dot(LLInv)
dL_dpsi2R = LmInv.T.dot(tmp+output_dim*np.eye(input_dim)).dot(LmInv)/2.
# Cache intermediate results
self.midRes['dL_dpsi2R'] = dL_dpsi2R
@ -201,7 +206,7 @@ class VarDTC_minibatch(LatentFunctionInference):
# Compute dL_dKmm
#======================================================================
dL_dKmm = dL_dpsi2R - output_dim*backsub_both_sides(Lm, LmInvPsi2LmInvT, transpose='left')/2.
dL_dKmm = dL_dpsi2R - output_dim*LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.
#======================================================================
# Compute the Posterior distribution of inducing points p(u|Y)

View file

@ -0,0 +1,69 @@
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ...util.linalg import pdinv
from .posterior import Posterior
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
class VarGauss(LatentFunctionInference):
"""
The Variational Gaussian Approximation revisited
@article{Opper:2009,
title = {The Variational Gaussian Approximation Revisited},
author = {Opper, Manfred and Archambeau, C{\'e}dric},
journal = {Neural Comput.},
year = {2009},
pages = {786--792},
}
"""
def __init__(self, alpha, beta):
"""
:param alpha: GPy.core.Param varational parameter
:param beta: GPy.core.Param varational parameter
"""
self.alpha, self.beta = alpha, beta
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, Z=None):
if mean_function is not None:
raise NotImplementedError
num_data, output_dim = Y.shape
assert output_dim ==1, "Only one output supported"
K = kern.K(X)
m = K.dot(self.alpha)
KB = K*self.beta[:, None]
BKB = KB*self.beta[None, :]
A = np.eye(num_data) + BKB
Ai, LA, _, Alogdet = pdinv(A)
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
var = np.diag(Sigma).reshape(-1,1)
F, dF_dm, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, m, var, Y_metadata=Y_metadata)
if dF_dthetaL is not None:
dL_dthetaL = dF_dthetaL.sum(1).sum(1)
else:
dL_dthetaL = np.array([])
dF_da = np.dot(K, dF_dm)
SigmaB = Sigma*self.beta
#dF_db_ = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
dF_db = -2*np.sum(Sigma**2 * (dF_dv * self.beta), 0)
#assert np.allclose(dF_db, dF_db_)
KL = 0.5*(Alogdet + np.trace(Ai) - num_data + np.sum(m*self.alpha))
dKL_da = m
A_A2 = Ai - Ai.dot(Ai)
dKL_db = np.diag(np.dot(KB.T, A_A2))
log_marginal = F.sum() - KL
self.alpha.gradient = dF_da - dKL_da
self.beta.gradient = dF_db - dKL_db
# K-gradients
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
tmp = Ai*self.beta[:, None]/self.beta[None, :]
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
return Posterior(mean=m, cov=Sigma ,K=K),\
log_marginal,\
{'dL_dK':dF_dK-dKL_dK, 'dL_dthetaL':dL_dthetaL}

View file

@ -1 +1,2 @@
from .hmc import HMC
from .samplers import *

View file

@ -1,14 +1,10 @@
# ## Copyright (c) 2014, Zhenwen Dai
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from __future__ import print_function
import numpy as np
from scipy import linalg, optimize
import Tango
import sys
import re
import numdifftools as ndt
import pdb
try:
#In Python 2, cPickle is faster. It does not exist in Python 3 but the underlying code is always used
@ -22,11 +18,11 @@ class Metropolis_Hastings:
def __init__(self,model,cov=None):
"""Metropolis Hastings, with tunings according to Gelman et al. """
self.model = model
current = self.model._get_params_transformed()
current = self.model.optimizer_array
self.D = current.size
self.chains = []
if cov is None:
self.cov = model.Laplace_covariance()
self.cov = np.eye(self.D)
else:
self.cov = cov
self.scale = 2.4/np.sqrt(self.D)
@ -37,20 +33,20 @@ class Metropolis_Hastings:
if start is None:
self.model.randomize()
else:
self.model._set_params_transformed(start)
self.model.optimizer_array = start
def sample(self, Ntotal, Nburn, Nthin, tune=True, tune_throughout=False, tune_interval=400):
current = self.model._get_params_transformed()
fcurrent = self.model.log_likelihood() + self.model.log_prior()
def sample(self, Ntotal=10000, Nburn=1000, Nthin=10, tune=True, tune_throughout=False, tune_interval=400):
current = self.model.optimizer_array
fcurrent = self.model.log_likelihood() + self.model.log_prior() + \
self.model._log_det_jacobian()
accepted = np.zeros(Ntotal,dtype=np.bool)
for it in range(Ntotal):
print("sample %d of %d\r"%(it,Ntotal), end=' ')
print("sample %d of %d\r"%(it,Ntotal),end="\t")
sys.stdout.flush()
prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
self.model._set_params_transformed(prop)
fprop = self.model.log_likelihood() + self.model.log_prior()
self.model.optimizer_array = prop
fprop = self.model.log_likelihood() + self.model.log_prior() + \
self.model._log_det_jacobian()
if fprop>fcurrent:#sample accepted, going 'uphill'
accepted[it] = True
@ -78,10 +74,11 @@ class Metropolis_Hastings:
def predict(self,function,args):
"""Make a prediction for the function, to which we will pass the additional arguments"""
param = self.model._get_params()
param = self.model.param_array
fs = []
for p in self.chain:
self.model._set_params(p)
self.model.param_array = p
fs.append(function(*args))
self.model._set_params(param)# reset model to starting state
# reset model to starting state
self.model.param_array = param
return fs

View file

@ -38,16 +38,17 @@ class SparseGPMissing(StochasticStorage):
import numpy as np
self.Y = model.Y_normalized
bdict = {}
#For N > 1000 array2string default crops
opt = np.get_printoptions()
np.set_printoptions(threshold=np.inf)
for d in range(self.Y.shape[1]):
inan = np.isnan(self.Y[:, d])
arr_str = np.array2string(inan,
np.inf, 0,
True, '',
formatter={'bool':lambda x: '1' if x else '0'})
inan = np.isnan(self.Y)[:, d]
arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
np.set_printoptions(**opt)
self.d = bdict.values()
class SparseGPStochastics(StochasticStorage):
@ -55,32 +56,36 @@ class SparseGPStochastics(StochasticStorage):
For the sparse gp we need to store the dimension we are in,
and the indices corresponding to those
"""
def __init__(self, model, batchsize=1):
def __init__(self, model, batchsize=1, missing_data=True):
self.batchsize = batchsize
self.output_dim = model.Y.shape[1]
self.Y = model.Y_normalized
self.missing_data = missing_data
self.reset()
self.do_stochastics()
def do_stochastics(self):
import numpy as np
if self.batchsize == 1:
self.current_dim = (self.current_dim+1)%self.output_dim
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.d])]]
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
else:
import numpy as np
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
bdict = {}
for d in self.d:
inan = np.isnan(self.Y[:, d])
arr_str = int(np.array2string(inan,
np.inf, 0,
True, '',
formatter={'bool':lambda x: '1' if x else '0'}), 2)
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
self.d = bdict.values()
if self.missing_data:
opt = np.get_printoptions()
np.set_printoptions(threshold=np.inf)
for d in self.d:
inan = np.isnan(self.Y[:, d])
arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
np.set_printoptions(**opt)
self.d = bdict.values()
else:
self.d = [[self.d, None]]
def reset(self):
self.current_dim = -1

View file

@ -6,6 +6,7 @@ from ._src.brownian import Brownian
from ._src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from ._src.mlp import MLP
from ._src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
from ._src.standard_periodic import StdPeriodic
from ._src.independent_outputs import IndependentOutputs, Hierarchical
from ._src.coregionalize import Coregionalize
from ._src.ODE_UY import ODE_UY
@ -17,7 +18,7 @@ from ._src.eq_ode2 import EQ_ODE2
from ._src.trunclinear import TruncLinear,TruncLinear_inf
from ._src.splitKern import SplitKern,DEtime
from ._src.splitKern import DEtime as DiffGenomeKern
from ._src.spline import Spline
from ._src.eq_ode2 import EQ_ODE2
from ._src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel

View file

@ -14,7 +14,7 @@ class Add(CombinationKernel):
This kernel will take over the active dims of it's subkernels passed in.
"""
def __init__(self, subkerns, name='add'):
def __init__(self, subkerns, name='sum'):
for i, kern in enumerate(subkerns[:]):
if isinstance(kern, Add):
del subkerns[i]
@ -72,15 +72,28 @@ class Add(CombinationKernel):
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target
@Cache_this(limit=2, force_kwargs=['which_parts'])
def gradients_XX(self, dL_dK, X, X2):
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
else:
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
[target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_XX_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts]
return target
@Cache_this(limit=1, force_kwargs=['which_parts'])
def psi0(self, Z, variational_posterior):
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
@Cache_this(limit=1, force_kwargs=['which_parts'])
def psi1(self, Z, variational_posterior):
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
@Cache_this(limit=1, force_kwargs=['which_parts'])
def psi2(self, Z, variational_posterior):
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
#return psi2
@ -115,6 +128,41 @@ class Add(CombinationKernel):
raise NotImplementedError("psi2 cannot be computed for this kernel")
return psi2
@Cache_this(limit=1, force_kwargs=['which_parts'])
def psi2n(self, Z, variational_posterior):
psi2 = reduce(np.add, (p.psi2n(Z, variational_posterior) for p in self.parts))
#return psi2
# compute the "cross" terms
from .static import White, Bias
from .rbf import RBF
#from rbf_inv import RBFInv
from .linear import Linear
#ffrom fixed import Fixed
for p1, p2 in itertools.combinations(self.parts, 2):
# i1, i2 = p1.active_dims, p2.active_dims
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
tmp = p2.psi1(Z, variational_posterior).sum(axis=0)
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z, variational_posterior).sum(axis=0)
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
tmp1 = p1.psi1(Z, variational_posterior)
tmp2 = p2.psi1(Z, variational_posterior)
psi2 += np.einsum('nm,no->nmo',tmp1,tmp2)+np.einsum('nm,no->nmo',tmp2,tmp1)
#(tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
else:
raise NotImplementedError("psi2 cannot be computed for this kernel")
return psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from .static import White, Bias
for p1 in self.parts:
@ -126,9 +174,9 @@ class Add(CombinationKernel):
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
@ -143,9 +191,9 @@ class Add(CombinationKernel):
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
return target
@ -161,9 +209,9 @@ class Add(CombinationKernel):
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
[np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))]
return target_grads

View file

@ -6,7 +6,11 @@ import numpy as np
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.config import config # for assesing whether to use cython
from . import coregionalize_cython
try:
from . import coregionalize_cython
config.set('cython', 'working', 'True')
except ImportError:
config.set('cython', 'working', 'False')
class Coregionalize(Kern):
"""
@ -94,7 +98,7 @@ class Coregionalize(Kern):
dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)
dkappa = np.diag(dL_dK_small)
dkappa = np.diag(dL_dK_small).copy()
dL_dK_small += dL_dK_small.T
dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
@ -111,7 +115,7 @@ class Coregionalize(Kern):
return dL_dK_small
def _gradient_reduce_cython(self, dL_dK, index, index2):
index, index2 = index[:,0], index2[:,0]
index, index2 = np.int64(index[:,0]), np.int64(index2[:,0])
return coregionalize_cython.gradient_reduce(self.B.shape[0], dL_dK, index, index2)
@ -126,4 +130,3 @@ class Coregionalize(Kern):
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)

File diff suppressed because it is too large Load diff

View file

@ -1,33 +1,37 @@
#cython: boundscheck=True
#cython: wraparound=True
#cython: boundscheck=False
#cython: wraparound=False
#cython: nonecheck=False
import cython
import numpy as np
cimport numpy as np
def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X):
cdef int N = X.size
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, N))
for n in range(N):
for m in range(N):
K[n,m] = B[X[n],X[m]]
cdef np.ndarray[np.double_t, ndim=2, mode='c'] K = np.empty((N, N))
with nogil:
for n in range(N):
for m in range(N):
K[n, m] = B[X[n], X[m]]
return K
def K_asymmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X, np.ndarray[np.int64_t, ndim=1] X2):
cdef int N = X.size
cdef int M = X2.size
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, M))
for n in range(N):
for m in range(M):
K[n,m] = B[X[n],X2[m]]
cdef np.ndarray[np.double_t, ndim=2, mode='c'] K = np.empty((N, M))
with nogil:
for n in range(N):
for m in range(M):
K[n, m] = B[X[n], X2[m]]
return K
def gradient_reduce(int D, np.ndarray[double, ndim=2] dL_dK, np.ndarray[np.int64_t, ndim=1] index, np.ndarray[np.int64_t, ndim=1] index2):
cdef np.ndarray[np.double_t, ndim=2] dL_dK_small = np.zeros((D, D))
cdef np.ndarray[np.double_t, ndim=2, mode='c'] dL_dK_small = np.zeros((D, D))
cdef int N = index.size
cdef int M = index2.size
for i in range(N):
for j in range(M):
dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
with nogil:
for i in range(N):
for j in range(M):
dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
return dL_dK_small

View file

@ -105,7 +105,7 @@ class IndependentOutputs(CombinationKernel):
if X2 is None:
# TODO: make use of index_to_slices
# FIXME: Broken as X is already sliced out
print("Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!")
# print("Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!")
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))

View file

@ -58,20 +58,9 @@ class Kern(Parameterized):
self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool)
@property
def return_psi2_n(self):
"""
Flag whether to pass back psi2 as NxMxM or MxM, by summing out N.
"""
return self._return_psi2_n_flag[0]
@return_psi2_n.setter
def return_psi2_n(self, val):
def visit(self):
if isinstance(self, Kern):
self._return_psi2_n_flag[0]=val
self.traverse(visit)
from .psi_comp import PSICOMP_GH
self.psicomp = PSICOMP_GH()
@Cache_this(limit=20)
def _slice_X(self, X):
@ -81,6 +70,9 @@ class Kern(Parameterized):
"""
Compute the kernel function.
.. math::
K_{ij} = k(X_i, X_j)
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
@ -88,16 +80,64 @@ class Kern(Parameterized):
"""
raise NotImplementedError
def Kdiag(self, X):
"""
The diagonal of the kernel matrix K
.. math::
Kdiag_{i} = k(X_i, X_i)
"""
raise NotImplementedError
def psi0(self, Z, variational_posterior):
raise NotImplementedError
"""
.. math::
\psi_0 = \sum_{i=0}^{n}E_{q(X)}[k(X_i, X_i)]
"""
return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
def psi1(self, Z, variational_posterior):
raise NotImplementedError
"""
.. math::
\psi_1^{n,m} = E_{q(X)}[k(X_n, Z_m)]
"""
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior):
raise NotImplementedError
"""
.. math::
\psi_2^{m,m'} = \sum_{i=0}^{n}E_{q(X)}[ k(Z_m, X_i) k(X_i, Z_{m'})]
"""
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
def psi2n(self, Z, variational_posterior):
"""
.. math::
\psi_2^{n,m,m'} = E_{q(X)}[ k(Z_m, X_n) k(X_n, Z_{m'})]
Thus, we do not sum out n, compared to psi2
"""
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
def gradients_X(self, dL_dK, X, X2):
"""
.. math::
\\frac{\partial L}{\partial X} = \\frac{\partial L}{\partial K}\\frac{\partial K}{\partial X}
"""
raise NotImplementedError
def gradients_X_X2(self, dL_dK, X, X2):
return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X)
def gradients_XX(self, dL_dK, X, X2):
"""
.. math::
\\frac{\partial^2 L}{\partial X\partial X_2} = \\frac{\partial L}{\partial K}\\frac{\partial^2 K}{\partial X\partial X_2}
"""
raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_XX_diag(self, dL_dKdiag, X):
"""
The diagonal of the second derivative w.r.t. X and X2
"""
raise(NotImplementedError, "This is the diagonal of the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_X_diag(self, dL_dKdiag, X):
"""
The diagonal of the derivative w.r.t. X
"""
raise NotImplementedError
def update_gradients_diag(self, dL_dKdiag, X):
@ -113,27 +153,35 @@ class Kern(Parameterized):
Set the gradients of all parameters when doing inference with
uncertain inputs, using expectations of the kernel.
The esential maths is
The essential maths is
dL_d{theta_i} = dL_dpsi0 * dpsi0_d{theta_i} +
dL_dpsi1 * dpsi1_d{theta_i} +
dL_dpsi2 * dpsi2_d{theta_i}
.. math::
\\frac{\partial L}{\partial \\theta_i} & = \\frac{\partial L}{\partial \psi_0}\\frac{\partial \psi_0}{\partial \\theta_i}\\
& \quad + \\frac{\partial L}{\partial \psi_1}\\frac{\partial \psi_1}{\partial \\theta_i}\\
& \quad + \\frac{\partial L}{\partial \psi_2}\\frac{\partial \psi_2}{\partial \\theta_i}
Thus, we push the different derivatives through the gradients of the psi
statistics. Be sure to set the gradients for all kernel
parameters here.
"""
raise NotImplementedError
dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0]
self.gradient[:] = dtheta
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=None, psi1=None, psi2=None):
"""
Returns the derivative of the objective wrt Z, using the chain rule
through the expectation variables.
"""
raise NotImplementedError
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[1]
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
Compute the gradients wrt the parameters of the variational
distruibution q(X), chain-ruling via the expectations of the kernel
"""
raise NotImplementedError
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2:]
def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs):
"""
@ -172,7 +220,7 @@ class Kern(Parameterized):
def __iadd__(self, other):
return self.add(other)
def add(self, other, name='add'):
def add(self, other, name='sum'):
"""
Add another kernel to this one.
@ -208,8 +256,6 @@ class Kern(Parameterized):
:param other: the other kernel to be added
:type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool
"""
assert isinstance(other, Kern), "only kernels can be multiplied to kernels..."

View file

@ -1,7 +1,11 @@
'''
Created on 11 Mar 2014
@author: maxz
@author: @mzwiessele
This module provides a meta class for the kernels. The meta class is for
slicing the inputs (X, X2) for the kernels, before K (or any other method involving X)
gets calls. The `active_dims` of a kernel decide which dimensions the kernel works on.
'''
from ...core.parameterization.parameterized import ParametersChangedMeta
import numpy as np
@ -19,20 +23,27 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
put_clean(dct, 'gradients_X', _slice_gradients_X)
put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
put_clean(dct, 'psi0', _slice_psi)
put_clean(dct, 'psi1', _slice_psi)
put_clean(dct, 'psi2', _slice_psi)
put_clean(dct, 'psi2n', _slice_psi)
put_clean(dct, 'update_gradients_expectations', _slice_update_gradients_expectations)
put_clean(dct, 'gradients_Z_expectations', _slice_gradients_Z_expectations)
put_clean(dct, 'gradients_qX_expectations', _slice_gradients_qX_expectations)
return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
class _Slice_wrap(object):
def __init__(self, k, X, X2=None):
def __init__(self, k, X, X2=None, ret_shape=None):
self.k = k
self.shape = X.shape
if ret_shape is None:
self.shape = X.shape
else:
self.shape = ret_shape
assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape)
if X2 is not None:
assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
@ -54,7 +65,10 @@ class _Slice_wrap(object):
def handle_return_array(self, return_val):
if self.ret:
ret = np.zeros(self.shape)
ret[:, self.k.active_dims] = return_val
if len(self.shape) == 2:
ret[:, self.k.active_dims] = return_val
elif len(self.shape) == 3:
ret[:, :, self.k.active_dims] = return_val
return ret
return return_val
@ -98,6 +112,19 @@ def _slice_gradients_X(f):
return ret
return wrap
def _slice_gradients_XX(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
if X2 is None:
N, M = X.shape[0], X.shape[0]
else:
N, M = X.shape[0], X2.shape[0]
with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s:
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
return ret
return wrap
def _slice_gradients_X_diag(f):
@wraps(f)
def wrap(self, dL_dKdiag, X):
@ -124,7 +151,8 @@ def _slice_update_gradients_expectations(f):
def _slice_gradients_Z_expectations(f):
@wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
with _Slice_wrap(self, Z, variational_posterior) as s:
ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2))
return ret
@ -132,7 +160,8 @@ def _slice_gradients_Z_expectations(f):
def _slice_gradients_qX_expectations(f):
@wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
with _Slice_wrap(self, variational_posterior, Z) as s:
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X))
r2 = ret[:2]

View file

@ -17,7 +17,7 @@ class Linear(Kern):
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
k(x,y) = \sum_{i=1}^{\\text{input_dim}} \sigma^2_i x_iy_i
:param input_dim: the number of input dimensions
:type input_dim: int
@ -100,6 +100,12 @@ class Linear(Kern):
#return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
return np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)
def gradients_XX(self, dL_dK, X, X2=None):
if X2 is None:
return 2*np.ones(X.shape)*self.variances
else:
return np.ones(X.shape)*self.variances
def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*X
@ -111,26 +117,29 @@ class Linear(Kern):
#---------------------------------------#
def psi0(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[0]
return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
def psi1(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[1]
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[2]
return self.psicomp.psicomputations(self, Z, variational_posterior)[2]
def psi2n(self, Z, variational_posterior):
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
dL_dvar = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[0]
dL_dvar = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0]
if self.ARD:
self.variances.gradient = dL_dvar
else:
self.variances.gradient = dL_dvar.sum()
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[1]
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[1]
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[2:]
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2:]
class LinearFull(Kern):
def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'):

View file

@ -5,6 +5,8 @@ from .kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
from ...util.linalg import tdot
from ...util.caching import Cache_this
four_over_tau = 2./np.pi
class MLP(Kern):
@ -31,105 +33,116 @@ class MLP(Kern):
"""
def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=100., active_dims=None, name='mlp'):
def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=1., ARD=False, active_dims=None, name='mlp'):
super(MLP, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.ARD= ARD
if ARD:
wv = np.empty((input_dim,))
wv[:] = weight_variance
weight_variance = wv
self.weight_variance = Param('weight_variance', weight_variance, Logexp())
self.bias_variance = Param('bias_variance', bias_variance, Logexp())
self.link_parameters(self.variance, self.weight_variance, self.bias_variance)
@Cache_this(limit=20, ignore_args=())
def K(self, X, X2=None):
self._K_computations(X, X2)
return self.variance*self._K_dvar
if X2 is None:
X_denom = np.sqrt(self._comp_prod(X)+1.)
X2_denom = X_denom
X2 = X
else:
X_denom = np.sqrt(self._comp_prod(X)+1.)
X2_denom = np.sqrt(self._comp_prod(X2)+1.)
XTX = self._comp_prod(X,X2)/X_denom[:,None]/X2_denom[None,:]
return self.variance*four_over_tau*np.arcsin(XTX)
@Cache_this(limit=20, ignore_args=())
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix for X."""
self._K_diag_computations(X)
return self.variance*self._K_diag_dvar
X_prod = self._comp_prod(X)
return self.variance*four_over_tau*np.arcsin(X_prod/(X_prod+1.))
def update_gradients_full(self, dL_dK, X, X2=None):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
self.variance.gradient = np.sum(self._K_dvar*dL_dK)
denom3 = self._K_denom**3
base = four_over_tau*self.variance/np.sqrt(1-self._K_asin_arg*self._K_asin_arg)
base_cov_grad = base*dL_dK
if X2 is None:
vec = np.diag(self._K_inner_prod)
self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom
-.5*self._K_numer/denom3
*(np.outer((self.weight_variance*vec+self.bias_variance+1.), vec)
+np.outer(vec,(self.weight_variance*vec+self.bias_variance+1.))))*base_cov_grad).sum()
self.bias_variance.gradient = ((1./self._K_denom
-.5*self._K_numer/denom3
*((vec[None, :]+vec[:, None])*self.weight_variance
+2.*self.bias_variance + 2.))*base_cov_grad).sum()
else:
vec1 = (X*X).sum(1)
vec2 = (X2*X2).sum(1)
self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom
-.5*self._K_numer/denom3
*(np.outer((self.weight_variance*vec1+self.bias_variance+1.), vec2) + np.outer(vec1, self.weight_variance*vec2 + self.bias_variance+1.)))*base_cov_grad).sum()
self.bias_variance.gradient = ((1./self._K_denom
-.5*self._K_numer/denom3
*((vec1[:, None]+vec2[None, :])*self.weight_variance
+ 2*self.bias_variance + 2.))*base_cov_grad).sum()
dvar, dw, db = self._comp_grads(dL_dK, X, X2)[:3]
self.variance.gradient = dvar
self.weight_variance.gradient = dw
self.bias_variance.gradient = db
def update_gradients_diag(self, dL_dKdiag, X):
self._K_diag_computations(X)
self.variance.gradient = np.sum(self._K_diag_dvar*dL_dKdiag)
base = four_over_tau*self.variance/np.sqrt(1-self._K_diag_asin_arg*self._K_diag_asin_arg)
base_cov_grad = base*dL_dKdiag/np.square(self._K_diag_denom)
self.weight_variance.gradient = (base_cov_grad*np.square(X).sum(axis=1)).sum()
self.bias_variance.gradient = base_cov_grad.sum()
dvar, dw, db = self._comp_grads_diag(dL_dKdiag, X)[:3]
self.variance.gradient = dvar
self.weight_variance.gradient = dw
self.bias_variance.gradient = db
def gradients_X(self, dL_dK, X, X2):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_asin_arg
numer = self._K_numer
denom = self._K_denom
denom3 = denom*denom*denom
if X2 is not None:
vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1.
return four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
else:
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
return 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
return self._comp_grads(dL_dK, X, X2)[3]
def gradients_X_X2(self, dL_dK, X, X2):
"""Derivative of the covariance matrix with respect to X"""
return self._comp_grads(dL_dK, X, X2)[3:]
def gradients_X_diag(self, dL_dKdiag, X):
"""Gradient of diagonal of covariance with respect to X"""
self._K_diag_computations(X)
arg = self._K_diag_asin_arg
denom = self._K_diag_denom
#numer = self._K_diag_numer
return four_over_tau*2.*self.weight_variance*self.variance*X*(1./denom*(1. - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None]
return self._comp_grads_diag(dL_dKdiag, X)[3]
def _K_computations(self, X, X2):
"""Pre-computations for the covariance matrix (used for computing the covariance and its gradients."""
@Cache_this(limit=50, ignore_args=())
def _comp_prod(self, X, X2=None):
if X2 is None:
self._K_inner_prod = np.dot(X,X.T)
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
vec = np.diag(self._K_numer) + 1.
self._K_denom = np.sqrt(np.outer(vec,vec))
return (np.square(X)*self.weight_variance).sum(axis=1)+self.bias_variance
else:
self._K_inner_prod = np.dot(X,X2.T)
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
vec1 = (X*X).sum(1)*self.weight_variance + self.bias_variance + 1.
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
self._K_denom = np.sqrt(np.outer(vec1,vec2))
self._K_asin_arg = self._K_numer/self._K_denom
self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg)
return (X*self.weight_variance).dot(X2.T)+self.bias_variance
def _K_diag_computations(self, X):
"""Pre-computations concerning the diagonal terms (used for computation of diagonal and its gradients)."""
self._K_diag_numer = (X*X).sum(1)*self.weight_variance + self.bias_variance
self._K_diag_denom = self._K_diag_numer+1.
self._K_diag_asin_arg = self._K_diag_numer/self._K_diag_denom
self._K_diag_dvar = four_over_tau*np.arcsin(self._K_diag_asin_arg)
@Cache_this(limit=20, ignore_args=(1,))
def _comp_grads(self, dL_dK, X, X2=None):
var,w,b = self.variance, self.weight_variance, self.bias_variance
K = self.K(X, X2)
dvar = (dL_dK*K).sum()/var
X_prod = self._comp_prod(X)
X2_prod = self._comp_prod(X2) if X2 is not None else X_prod
XTX = self._comp_prod(X,X2) if X2 is not None else self._comp_prod(X, X)
common = var*four_over_tau/np.sqrt((X_prod[:,None]+1.)*(X2_prod[None,:]+1.)-np.square(XTX))*dL_dK
if self.ARD:
if X2 is not None:
XX2 = X[:,None,:]*X2[None,:,:] if X2 is not None else X[:,None,:]*X[None,:,:]
XX = np.square(X)
X2X2 = np.square(X2)
Q = self.weight_variance.shape[0]
common_XTX = common*XTX
dw = np.dot(common.flat,XX2.reshape(-1,Q)) -( (common_XTX.sum(1)/(X_prod+1.)).T.dot(XX)+(common_XTX.sum(0)/(X2_prod+1.)).dot(X2X2))/2
else:
XX2 = X[:,None,:]*X[None,:,:]
XX = np.square(X)
Q = self.weight_variance.shape[0]
common_XTX = common*XTX
dw = np.dot(common.flat,XX2.reshape(-1,Q)) - ((common_XTX.sum(0)+common_XTX.sum(1))/(X_prod+1.)).dot(XX)/2
else:
dw = (common*((XTX-b)/w-XTX*(((X_prod-b)/(w*(X_prod+1.)))[:,None]+((X2_prod-b)/(w*(X2_prod+1.)))[None,:])/2.)).sum()
db = (common*(1.-XTX*(1./(X_prod[:,None]+1.)+1./(X2_prod[None,:]+1.))/2.)).sum()
if X2 is None:
common = common+common.T
dX = common.dot(X)*w-((common*XTX).sum(axis=1)/(X_prod+1.))[:,None]*X*w
dX2 = dX
else:
dX = common.dot(X2)*w-((common*XTX).sum(axis=1)/(X_prod+1.))[:,None]*X*w
dX2 = common.T.dot(X)*w-((common*XTX).sum(axis=0)/(X2_prod+1.))[:,None]*X2*w
return dvar, dw, db, dX, dX2
@Cache_this(limit=20, ignore_args=(1,))
def _comp_grads_diag(self, dL_dKdiag, X):
var,w,b = self.variance, self.weight_variance, self.bias_variance
K = self.Kdiag(X)
dvar = (dL_dKdiag*K).sum()/var
X_prod = self._comp_prod(X)
common = var*four_over_tau/(np.sqrt(1-np.square(X_prod/(X_prod+1)))*np.square(X_prod+1))*dL_dKdiag
if self.ARD:
XX = np.square(X)
dw = np.dot(common,XX)
else:
dw = (common*(X_prod-b)).sum()/w
db = common.sum()
dX = common[:,None]*X*w*2
return dvar, dw, db, dX

View file

@ -27,8 +27,6 @@ class Prod(CombinationKernel):
:param k1, k2: the kernels to multiply
:type k1, k2: Kern
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""

View file

@ -9,18 +9,34 @@ from . import ssrbf_psi_comp
from . import sslinear_psi_comp
from . import linear_psi_comp
class PSICOMP_RBF(Pickleable):
@Cache_this(limit=2, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
class PSICOMP(Pickleable):
def psicomputations(self, kern, Z, qX, return_psi2_n=False):
raise NotImplementedError("Abstract method!")
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, qX):
raise NotImplementedError("Abstract method!")
def _setup_observers(self):
pass
from .gaussherm import PSICOMP_GH
class PSICOMP_RBF(PSICOMP):
@Cache_this(limit=5, ignore_args=(0,))
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
variance, lengthscale = kern.variance, kern.lengthscale
if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=return_psi2_n)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
else:
raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
@Cache_this(limit=5, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variance, lengthscale = kern.variance, kern.lengthscale
if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
@ -28,28 +44,26 @@ class PSICOMP_RBF(Pickleable):
else:
raise ValueError("unknown distriubtion received for psi-statistics")
def _setup_observers(self):
pass
class PSICOMP_Linear(PSICOMP):
class PSICOMP_Linear(Pickleable):
@Cache_this(limit=2, ignore_args=(0,))
def psicomputations(self, variance, Z, variational_posterior):
@Cache_this(limit=5, ignore_args=(0,))
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
variances = kern.variances
if isinstance(variational_posterior, variational.NormalPosterior):
return linear_psi_comp.psicomputations(variance, Z, variational_posterior)
return linear_psi_comp.psicomputations(variances, Z, variational_posterior, return_psi2_n=return_psi2_n)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return sslinear_psi_comp.psicomputations(variance, Z, variational_posterior)
return sslinear_psi_comp.psicomputations(variances, Z, variational_posterior)
else:
raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
@Cache_this(limit=2, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variances = kern.variances
if isinstance(variational_posterior, variational.NormalPosterior):
return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variances, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variances, Z, variational_posterior)
else:
raise ValueError("unknown distriubtion received for psi-statistics")
def _setup_observers(self):
pass

View file

@ -0,0 +1,100 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
An approximated psi-statistics implementation based on Gauss-Hermite Quadrature
"""
import numpy as np
from ....core.parameterization import Param
from GPy.util.caching import Cache_this
from ....util.linalg import tdot
from . import PSICOMP
class PSICOMP_GH(PSICOMP):
"""
TODO: support Psi2 with shape NxMxM
"""
def __init__(self, degree=5, cache_K=True):
self.degree = degree
self.cache_K = cache_K
self.locs, self.weights = np.polynomial.hermite.hermgauss(degree)
self.locs *= np.sqrt(2.)
self.weights*= 1./np.sqrt(np.pi)
self.Xs = None
def _setup_observers(self):
pass
@Cache_this(limit=10, ignore_args=(0,))
def comp_K(self, Z, qX):
if self.Xs is None or self.Xs.shape != qX.mean.shape:
from ....core.parameterization import ObsAr
self.Xs = ObsAr(np.empty((self.degree,)+qX.mean.shape))
mu, S = qX.mean.values, qX.variance.values
S_sq = np.sqrt(S)
for i in xrange(self.degree):
self.Xs[i] = self.locs[i]*S_sq+mu
return self.Xs
@Cache_this(limit=10, ignore_args=(0,))
def psicomputations(self, kern, Z, qX, return_psi2_n=False):
mu, S = qX.mean.values, qX.variance.values
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
if self.cache_K: Xs = self.comp_K(Z, qX)
else: S_sq = np.sqrt(S)
psi0 = np.zeros((N,))
psi1 = np.zeros((N,M))
psi2 = np.zeros((M,M))
for i in xrange(self.degree):
if self.cache_K:
X = Xs[i]
else:
X = self.locs[i]*S_sq+mu
psi0 += self.weights[i]* kern.Kdiag(X)
Kfu = kern.K(X,Z)
psi1 += self.weights[i]* Kfu
psi2 += self.weights[i]* tdot(Kfu.T)
return psi0, psi1, psi2
@Cache_this(limit=10, ignore_args=(0, 2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, qX):
mu, S = qX.mean.values, qX.variance.values
if self.cache_K: Xs = self.comp_K(Z, qX)
S_sq = np.sqrt(S)
dtheta_old = kern.gradient.copy()
dtheta = np.zeros_like(kern.gradient)
if isinstance(Z, Param):
dZ = np.zeros_like(Z.values)
else:
dZ = np.zeros_like(Z)
dmu = np.zeros_like(mu)
dS = np.zeros_like(S)
for i in xrange(self.degree):
if self.cache_K:
X = Xs[i]
else:
X = self.locs[i]*S_sq+mu
dL_dpsi0_i = dL_dpsi0*self.weights[i]
kern.update_gradients_diag(dL_dpsi0_i, X)
dtheta += kern.gradient
dX = kern.gradients_X_diag(dL_dpsi0_i, X)
Kfu = kern.K(X,Z)
dL_dkfu = (dL_dpsi1+ 2.*Kfu.dot(dL_dpsi2))*self.weights[i]
kern.update_gradients_full(dL_dkfu, X, Z)
dtheta += kern.gradient
dX_i, dZ_i = kern.gradients_X_X2(dL_dkfu, X, Z)
dX += dX_i
dZ += dZ_i
dmu += dX
dS += dX*self.locs[i]/(2.*S_sq)
kern.gradient[:] = dtheta_old
return dtheta, dZ, dmu, dS

View file

@ -8,7 +8,7 @@ The package for the Psi statistics computation of the linear kernel for Bayesian
import numpy as np
from ....util.linalg import tdot
def psicomputations(variance, Z, variational_posterior):
def psicomputations(variance, Z, variational_posterior, return_psi2_n=False):
"""
Compute psi-statistics for ss-linear kernel
"""
@ -21,8 +21,12 @@ def psicomputations(variance, Z, variational_posterior):
S = variational_posterior.variance
psi0 = (variance*(np.square(mu)+S)).sum(axis=1)
psi1 = np.dot(mu,(variance*Z).T)
psi2 = np.dot(S.sum(axis=0)*np.square(variance)*Z,Z.T)+ tdot(psi1.T)
Zv = variance * Z
psi1 = np.dot(mu,Zv.T)
if return_psi2_n:
psi2 = psi1[:,:,None] * psi1[:,None,:] + np.dot(S[:,None,:] * Zv[None,:,:], Zv.T)
else:
psi2 = np.dot(S.sum(axis=0) * Zv, Zv.T) + tdot(psi1.T)
return psi0, psi1, psi2
@ -59,19 +63,39 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S):
variance2 = np.square(variance)
common_sum = np.dot(mu,(variance*Z).T)
Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
common_expect = np.dot(common_sum,np.dot(dL_dpsi2T,Z))
Z2_expect = np.inner(common_sum,dL_dpsi2T)
Z1_expect = np.dot(dL_dpsi2T,Z)
if len(dL_dpsi2.shape)==2:
Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
common_expect = np.dot(common_sum,np.dot(dL_dpsi2T,Z))
Z2_expect = np.inner(common_sum,dL_dpsi2T)
Z1_expect = np.dot(dL_dpsi2T,Z)
dL_dvar = 2.*S.sum(axis=0)*variance*Z_expect+(common_expect*mu).sum(axis=0)
dL_dvar = 2.*S.sum(axis=0)*variance*Z_expect+(common_expect*mu).sum(axis=0)
dL_dmu = common_expect*variance
dL_dmu = common_expect*variance
dL_dS = np.empty(S.shape)
dL_dS[:] = Z_expect*variance2
dL_dS = np.empty(S.shape)
dL_dS[:] = Z_expect*variance2
dL_dZ = variance2*S.sum(axis=0)*Z1_expect+np.dot(Z2_expect.T,variance*mu)
dL_dZ = variance2*S.sum(axis=0)*Z1_expect+np.dot(Z2_expect.T,variance*mu)
else:
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
dL_dpsi2_ = dL_dpsi2.sum(axis=0)
Z_expect = (np.dot(dL_dpsi2.reshape(N*M,M),Z).reshape(N,M,Q)*Z[None,:,:]).sum(axis=1)
dL_dpsi2T = dL_dpsi2_+dL_dpsi2_.T
dL_dpsi2T_ = dL_dpsi2+np.swapaxes(dL_dpsi2, 1, 2)
common_expect = np.dot(common_sum,np.dot(dL_dpsi2T,Z))
common_expect_ = (common_sum[:,:,None]*np.dot(dL_dpsi2T_.reshape(N*M,M),Z).reshape(N,M,Q)).sum(axis=1)
Z2_expect = (common_sum[:,:,None]*dL_dpsi2T_).sum(axis=1)
Z1_expect = np.dot(dL_dpsi2T_.reshape(N*M,M),Z).reshape(N,M,Q)
dL_dvar = 2.*variance*(S*Z_expect).sum(axis=0)+(common_expect_*mu).sum(axis=0)
dL_dmu = common_expect_*variance
dL_dS = np.empty(S.shape)
dL_dS[:] = variance2* Z_expect
dL_dZ = variance2*(S[:,None,:]*Z1_expect).sum(axis=0)+np.dot(Z2_expect.T,variance*mu)
return dL_dvar, dL_dmu, dL_dS, dL_dZ

View file

@ -5,13 +5,7 @@ The module for psi-statistics for RBF kernel
import numpy as np
from GPy.util.caching import Cacher
def psicomputations(variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
def psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=False):
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
@ -21,16 +15,11 @@ def psicomputations(variance, lengthscale, Z, variational_posterior):
psi0 = np.empty(mu.shape[0])
psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S)
if not return_psi2_n: psi2 = psi2.sum(axis=0)
return psi0, psi1, psi2
def __psi1computations(variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results:
# _psi1 NxM
@ -45,26 +34,19 @@ def __psi1computations(variance, lengthscale, Z, mu, S):
return _psi1
def __psi2computations(variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi2
# Produced intermediate results:
# _psi2 MxM
N,M,Q = mu.shape[0], Z.shape[0], mu.shape[1]
lengthscale2 = np.square(lengthscale)
_psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
_psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
denom = 1./(2.*S+lengthscale2)
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+(2*(mu*denom).dot(Z_hat.reshape(M*M,Q).T) - denom.dot(np.square(Z_hat).reshape(M*M,Q).T)).reshape(N,M,M)
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
@ -86,13 +68,6 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscal
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
"""
dL_dpsi1 - NxM
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1
@ -118,13 +93,6 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
dL_dpsi2 - MxM
"""
# here are the "statistics" for psi2
# Produced the derivatives w.r.t. psi2:
# _dL_dvariance 1
@ -157,5 +125,5 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
_psi1computations = Cacher(__psi1computations, limit=1)
_psi2computations = Cacher(__psi2computations, limit=1)
_psi1computations = Cacher(__psi1computations, limit=5)
_psi2computations = Cacher(__psi2computations, limit=5)

View file

@ -7,13 +7,6 @@ from ....util.caching import Cache_this
from . import PSICOMP_RBF
from ....util import gpu_init
try:
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
from ....util.linalg_gpu import sum_axis
except:
pass
gpu_code = """
// define THREADNUM
@ -241,7 +234,11 @@ gpu_code = """
class PSICOMP_RBF_GPU(PSICOMP_RBF):
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
def __init__(self, threadnum=256, blocknum=30, GPU_direct=False):
from pycuda.compiler import SourceModule
from ....util.gpu_init import initGPU
initGPU()
self.GPU_direct = GPU_direct
self.gpuCache = None
@ -265,6 +262,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
return s
def _initGPUCache(self, N, M, Q):
import pycuda.gpuarray as gpuarray
if self.gpuCache == None:
self.gpuCache = {
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
@ -320,13 +318,14 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
def get_dimensions(self, Z, variational_posterior):
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
@Cache_this(limit=1, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
@Cache_this(limit=5, ignore_args=(0,))
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
"""
Z - MxQ
mu - NxQ
S - NxQ
"""
variance, lengthscale = kern.variance, kern.lengthscale
N,M,Q = self.get_dimensions(Z, variational_posterior)
self._initGPUCache(N,M,Q)
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
@ -355,8 +354,10 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
else:
return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
@Cache_this(limit=5, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variance, lengthscale = kern.variance, kern.lengthscale
from ....util.linalg_gpu import sum_axis
ARD = (len(lengthscale)!=1)
N,M,Q = self.get_dimensions(Z, variational_posterior)

View file

@ -9,7 +9,7 @@ from ....util.linalg import tdot
import numpy as np
def psicomputations(variance, Z, variational_posterior):
def psicomputations(variance, Z, variational_posterior, return_psi2_n=False):
"""
Compute psi-statistics for ss-linear kernel
"""

View file

@ -6,14 +6,7 @@ The module for psi-statistics for RBF kernel for Spike-and-Slab GPLVM
import numpy as np
from ....util.caching import Cache_this
from . import PSICOMP_RBF
from ....util import gpu_init
try:
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
from ....util.linalg_gpu import sum_axis
except:
pass
gpu_code = """
// define THREADNUM
@ -292,6 +285,11 @@ gpu_code = """
class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
from pycuda.compiler import SourceModule
from ....util.gpu_init import initGPU
initGPU()
self.GPU_direct = GPU_direct
self.gpuCache = None
@ -315,6 +313,7 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
return s
def _initGPUCache(self, N, M, Q):
import pycuda.gpuarray as gpuarray
if self.gpuCache == None:
self.gpuCache = {
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
@ -377,12 +376,13 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
@Cache_this(limit=1, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
"""
Z - MxQ
mu - NxQ
S - NxQ
"""
variance, lengthscale = kern.variance, kern.lengthscale
N,M,Q = self.get_dimensions(Z, variational_posterior)
self._initGPUCache(N,M,Q)
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
@ -409,8 +409,10 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
else:
return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
@Cache_this(limit=1, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variance, lengthscale = kern.variance, kern.lengthscale
from ....util.linalg_gpu import sum_axis
ARD = (len(lengthscale)!=1)
N,M,Q = self.get_dimensions(Z, variational_posterior)

View file

@ -31,6 +31,9 @@ class RBF(Stationary):
def dK_dr(self, r):
return -r*self.K_of_r(r)
def dK2_drdr(self, r):
return (r**2-1)*self.K_of_r(r)
def __getstate__(self):
dc = super(RBF, self).__getstate__()
if self.useGPU:
@ -50,22 +53,25 @@ class RBF(Stationary):
#---------------------------------------#
def psi0(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0]
return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
def psi1(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2]
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
def psi2n(self, Z, variational_posterior):
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2]
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[:2]
self.variance.gradient = dL_dvar
self.lengthscale.gradient = dL_dlengscale
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2]
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2]
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:]
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[3:]

52
GPy/kern/_src/spline.py Normal file
View file

@ -0,0 +1,52 @@
# Copyright (c) 2015, Thomas Hornung
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from .kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Spline(Kern):
"""
Linear spline kernel. You need to specify 2 parameters: the variance and c.
The variance is defined in powers of 10. Thus specifying -2 means 10^-2.
The parameter c allows to define the stiffness of the spline fit. A very stiff
spline equals linear regression.
See https://www.youtube.com/watch?v=50Vgw11qn0o starting at minute 1:17:28
Lit: Wahba, 1990
"""
def __init__(self, input_dim, variance=1., c=1., active_dims=None, name='spline'):
super(Spline, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.c = Param('c', c)
self.link_parameters(self.variance,self.c)
def K(self, X, X2=None):
if X2 is None: X2=X
term1 = (X+8.)*(X2.T+8.)/16.
term2 = abs((X-X2.T)/16.)**3
term3 = ((X+8.)/16.)**3 + ((X2.T+8.)/16.)**3
return (self.variance**2 * (1. + (1.+self.c) * term1 + self.c/3. * (term2 - term3)))
def Kdiag(self, X):
term1 = np.square(X+8.,X+8.)/16.
term3 = 2. * ((X+8.)/16.)**3
return (self.variance**2 * (1. + (1.+self.c) * term1 - self.c/3. * term3))[:,0]
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None: X2=X
term1 = (X+8.)*(X2.T+8.)/16.
term2 = abs((X-X2.T)/16.)**3
term3 = ((X+8.)/16.)**3 + ((X2.T+8.)/16.)**3
self.variance.gradient = np.sum(dL_dK * (2*self.variance * (1. + (1.+self.c) * term1 + self.c/3. * ( term2 - term3))))
self.c.gradient = np.sum(dL_dK * (self.variance**2* (term1 + 1./3.*(term2 - term3))))
def update_gradients_diag(self, dL_dKdiag, X):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2=None):
raise NotImplementedError
def gradients_X_diag(self, dL_dKdiag, X):
raise NotImplementedError

View file

@ -0,0 +1,166 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The standard periodic kernel which mentioned in:
[1] Gaussian Processes for Machine Learning, C. E. Rasmussen, C. K. I. Williams.
The MIT Press, 2005.
[2] Introduction to Gaussian processes. D. J. C. MacKay. In C. M. Bishop, editor,
Neural Networks and Machine Learning, pages 133-165. Springer, 1998.
"""
from .kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
class StdPeriodic(Kern):
"""
Standart periodic kernel
.. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\theta_1` in the formula above
:type variance: float
:param wavelength: the vector of wavelengths :math:`\lambda_i`. If None then 1.0 is assumed.
:type wavelength: array or list of the appropriate size (or float if there is only one wavelength parameter)
:param lengthscale: the vector of lengthscale :math:`\l_i`. If None then 1.0 is assumed.
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD1: Auto Relevance Determination with respect to wavelength.
If equal to "False" one single wavelength parameter :math:`\lambda_i` for
each dimension is assumed, otherwise there is one lengthscale
parameter per dimension.
:type ARD1: Boolean
:param ARD2: Auto Relevance Determination with respect to lengthscale.
If equal to "False" one single wavelength parameter :math:`l_i` for
each dimension is assumed, otherwise there is one lengthscale
parameter per dimension.
:type ARD2: Boolean
:param active_dims: indices of dimensions which are used in the computation of the kernel
:type wavelength: array or list of the appropriate size
:param name: Name of the kernel for output
:type String
:param useGPU: whether of not use GPU
:type Boolean
"""
def __init__(self, input_dim, variance=1., wavelength=None, lengthscale=None, ARD1=False, ARD2=False, active_dims=None, name='std_periodic',useGPU=False):
super(StdPeriodic, self).__init__(input_dim, active_dims, name, useGPU=useGPU)
self.input_dim = input_dim
self.ARD1 = ARD1 # correspond to wavelengths
self.ARD2 = ARD2 # correspond to lengthscales
self.name = name
if self.ARD1 == False:
if wavelength is not None:
wavelength = np.asarray(wavelength)
assert wavelength.size == 1, "Only one wavelength needed for non-ARD kernel"
else:
wavelength = np.ones(1)
else:
if wavelength is not None:
wavelength = np.asarray(wavelength)
assert wavelength.size == input_dim, "bad number of wavelengths"
else:
wavelength = np.ones(input_dim)
if self.ARD2 == False:
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
lengthscale = np.ones(1)
else:
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(input_dim)
self.variance = Param('variance', variance, Logexp())
assert self.variance.size==1, "Variance size must be one"
self.wavelengths = Param('wavelengths', wavelength, Logexp())
self.lengthscales = Param('lengthscales', lengthscale, Logexp())
self.link_parameters(self.variance, self.wavelengths, self.lengthscales)
def parameters_changed(self):
"""
This functions deals as a callback for each optimization iteration.
If one optimization step was successfull and the parameters
this callback function will be called to be able to update any
precomputations for the kernel.
"""
pass
def K(self, X, X2=None):
"""Compute the covariance matrix between X and X2."""
if X2 is None:
X2 = X
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths
exp_dist = np.exp( -0.5* np.sum( np.square( np.sin( base ) / self.lengthscales ), axis = -1 ) )
return self.variance * exp_dist
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X."""
ret = np.empty(X.shape[0])
ret[:] = self.variance
return ret
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None:
X2 = X
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths
sin_base = np.sin( base )
exp_dist = np.exp( -0.5* np.sum( np.square( sin_base / self.lengthscales ), axis = -1 ) )
dwl = self.variance * (1.0/np.square(self.lengthscales)) * sin_base*np.cos(base) * (base / self.wavelengths)
dl = self.variance * np.square( sin_base) / np.power( self.lengthscales, 3)
self.variance.gradient = np.sum(exp_dist * dL_dK)
#target[0] += np.sum( exp_dist * dL_dK)
if self.ARD1: # different wavelengths
self.wavelengths.gradient = (dwl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
else: # same wavelengths
self.wavelengths.gradient = np.sum(dwl.sum(-1) * exp_dist * dL_dK)
if self.ARD2: # different lengthscales
self.lengthscales.gradient = (dl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
else: # same lengthscales
self.lengthscales.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
self.variance.gradient = np.sum(dL_dKdiag)
self.wavelengths.gradient = 0
self.lengthscales.gradient = 0
# def gradients_X(self, dL_dK, X, X2=None):
# """derivative of the covariance matrix with respect to X."""
#
# raise NotImplemented("Periodic kernel: dK_dX not implemented")
#
# def gradients_X_diag(self, dL_dKdiag, X):
#
# raise NotImplemented("Periodic kernel: dKdiag_dX not implemented")

View file

@ -24,6 +24,13 @@ class Static(Kern):
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def gradients_XX(self, dL_dK, X, X2):
if X2 is None:
X2 = X
return np.zeros((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
def gradients_XX_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(Z.shape)
@ -59,6 +66,9 @@ class White(Static):
def psi2(self, Z, variational_posterior):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def psi2n(self, Z, variational_posterior):
return np.zeros((1, Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
self.variance.gradient = np.trace(dL_dK)
@ -92,6 +102,11 @@ class Bias(Static):
ret[:] = self.variance*self.variance*variational_posterior.shape[0]
return ret
def psi2n(self, Z, variational_posterior):
ret = np.empty((1, Z.shape[0], Z.shape[0]), dtype=np.float64)
ret[:] = self.variance*self.variance
return ret
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()*variational_posterior.shape[0]
@ -120,6 +135,9 @@ class Fixed(Static):
def psi2(self, Z, variational_posterior):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def psi2n(self, Z, variational_posterior):
return np.zeros((1, Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum()

View file

@ -15,7 +15,7 @@ from ...util.caching import Cache_this
try:
from . import stationary_cython
except ImportError:
print('warning in sationary: failed to import cython module: falling back to numpy')
print('warning in stationary: failed to import cython module: falling back to numpy')
config.set('cython', 'working', 'false')
@ -25,13 +25,16 @@ class Stationary(Kern):
Stationary covariance fucntion depend only on r, where r is defined as
r = \sqrt{ \sum_{q=1}^Q (x_q - x'_q)^2 }
.. math::
r(x, x') = \\sqrt{ \\sum_{q=1}^Q (x_q - x'_q)^2 }
The covariance function k(x, x' can then be written k(r).
In this implementation, r is scaled by the lengthscales parameter(s):
r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }.
.. math::
r(x, x') = \\sqrt{ \\sum_{q=1}^Q \\frac{(x_q - x'_q)^2}{\ell_q^2} }.
By default, there's only one lengthscale: seaprate lengthscales for each
dimension can be enables by setting ARD=True.
@ -39,11 +42,12 @@ class Stationary(Kern):
To implement a stationary covariance function using this class, one need
only define the covariance function k(r), and it derivative.
...
def K_of_r(self, r):
return foo
def dK_dr(self, r):
return bar
```
def K_of_r(self, r):
return foo
def dK_dr(self, r):
return bar
```
The lengthscale(s) and variance parameters are added to the structure automatically.
@ -77,6 +81,10 @@ class Stationary(Kern):
def dK_dr(self, r):
raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
@Cache_this(limit=20, ignore_args=())
def dK2_drdr(self, r):
raise NotImplementedError("implement second derivative of covariance wrt r to use this method")
@Cache_this(limit=5, ignore_args=())
def K(self, X, X2=None):
"""
@ -89,11 +97,16 @@ class Stationary(Kern):
r = self._scaled_dist(X, X2)
return self.K_of_r(r)
@Cache_this(limit=3, ignore_args=())
@Cache_this(limit=20, ignore_args=())
def dK_dr_via_X(self, X, X2):
#a convenience function, so we can cache dK_dr
return self.dK_dr(self._scaled_dist(X, X2))
@Cache_this(limit=3, ignore_args=())
def dK2_drdr_via_X(self, X, X2):
#a convenience function, so we can cache dK_dr
return self.dK2_drdr(self._scaled_dist(X, X2))
def _unscaled_dist(self, X, X2=None):
"""
Compute the Euclidean distance between each row of X and X2, or between
@ -114,12 +127,13 @@ class Stationary(Kern):
r2 = np.clip(r2, 0, np.inf)
return np.sqrt(r2)
@Cache_this(limit=5, ignore_args=())
@Cache_this(limit=20, ignore_args=())
def _scaled_dist(self, X, X2=None):
"""
Efficiently compute the scaled distance, r.
r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
..math::
r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
Note that if thre is only one lengthscale, l comes outside the sum. In
this case we compute the unscaled distance first (in a separate
@ -201,6 +215,59 @@ class Stationary(Kern):
else:
return self._gradients_X_pure(dL_dK, X, X2)
def gradients_XX(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
..math:
\frac{\partial^2 K}{\partial X\partial X2}
..returns:
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
Thus, we return the second derivative in X2.
"""
# The off diagonals in Q are always zero, this should also be true for the Linear kernel...
# According to multivariable chain rule, we can chain the second derivative through r:
# d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2:
invdist = self._inv_dist(X, X2)
invdist2 = invdist**2
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp1 = dL_dr * invdist
dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK
tmp2 = dL_drdr * invdist2
l2 = np.ones(X.shape[1]) * self.lengthscale**2
if X2 is None:
X2 = X
tmp1 -= np.eye(X.shape[0])*self.variance
else:
tmp1[X==X2.T] -= self.variance
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
#grad = np.empty(X.shape, dtype=np.float64)
for q in range(self.input_dim):
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
return grad
def gradients_XX_diag(self, dL_dK, X):
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
..math:
\frac{\partial^2 K}{\partial X\partial X2}
..returns:
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ]
"""
return np.ones(X.shape) * self.variance/self.lengthscale**2
def _gradients_X_pure(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
@ -259,7 +326,7 @@ class OU(Stationary):
.. math::
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
@ -279,7 +346,7 @@ class Matern32(Stationary):
.. math::
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
@ -326,7 +393,7 @@ class Matern52(Stationary):
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
"""
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

File diff suppressed because it is too large Load diff

View file

@ -4,14 +4,15 @@
import numpy as np
cimport numpy as np
from cython.parallel import prange
cimport cython
ctypedef np.float64_t DTYPE_t
cdef extern from "stationary_utils.h":
void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad)
void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad) nogil
cdef extern from "stationary_utils.h":
void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad)
void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad) nogil
def grad_X(int N, int D, int M,
np.ndarray[DTYPE_t, ndim=2] _X,
@ -22,18 +23,18 @@ def grad_X(int N, int D, int M,
cdef double *X2 = <double*> _X2.data
cdef double *tmp = <double*> _tmp.data
cdef double *grad = <double*> _grad.data
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
with nogil:
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
@cython.cdivision(True)
def grad_X_cython(int N, int D, int M, double[:,:] X, double[:,:] X2, double[:,:] tmp, double[:,:] grad):
cdef int n,d,nd,m
for nd in prange(N*D, nogil=True):
n = nd/D
d = nd%D
for nd in prange(N * D, nogil=True):
n = nd / D
d = nd % D
grad[n,d] = 0.0
for m in range(M):
grad[n,d] += tmp[n,m]*(X[n,d]-X2[m,d])
grad[n,d] += tmp[n, m] * (X[n, d] - X2[m, d])
def lengthscale_grads_in_c(int N, int M, int Q,
np.ndarray[DTYPE_t, ndim=2] _tmp,
@ -44,16 +45,16 @@ def lengthscale_grads_in_c(int N, int M, int Q,
cdef double *X = <double*> _X.data
cdef double *X2 = <double*> _X2.data
cdef double *grad = <double*> _grad.data
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
with nogil:
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
def lengthscale_grads(int N, int M, int Q, double[:,:] tmp, double[:,:] X, double[:,:] X2, double[:] grad):
cdef int q, n, m
cdef double gradq, dist
for q in range(Q):
grad[q] = 0.0
for n in range(N):
for m in range(M):
dist = X[n,q] - X2[m,q]
grad[q] += tmp[n,m]*dist*dist
with nogil:
for q in range(Q):
grad[q] = 0.0
for n in range(N):
for m in range(M):
dist = X[n,q] - X2[m,q]
grad[q] += tmp[n, m] * dist * dist

View file

@ -1,3 +1,5 @@
#ifndef __APPLE__
#include <omp.h>
#endif
void _grad_X(int N, int D, int M, double*X, double* X2, double* tmp, double* grad);
void _lengthscale_grads(int N, int D, int M, double* X, double* X2, double* tmp, double* grad);

View file

@ -15,7 +15,7 @@ class TruncLinear(Kern):
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q)
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
:param input_dim: the number of input dimensions
:type input_dim: int
@ -114,7 +114,7 @@ class TruncLinear_inf(Kern):
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q)
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
:param input_dim: the number of input dimensions
:type input_dim: int

View file

@ -1,6 +1,6 @@
from .bernoulli import Bernoulli
from .exponential import Exponential
from .gaussian import Gaussian
from .gaussian import Gaussian, HeteroscedasticGaussian
from .gamma import Gamma
from .poisson import Poisson
from .student_t import StudentT

View file

@ -85,6 +85,7 @@ class Bernoulli(Likelihood):
gh_x, gh_w = gh_points
gh_w = gh_w / np.sqrt(np.pi)
shape = m.shape
m,v,Y = m.flatten(), v.flatten(), Y.flatten()
Ysign = np.where(Y==1,1,-1)
@ -232,6 +233,17 @@ class Bernoulli(Likelihood):
np.seterr(**state)
return d3logpdf_dlink3
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
"""
Get the "quantiles" of the binary labels (Bernoulli draws). all the
quantiles must be either 0 or 1, since those are the only values the
draw can take!
"""
p = self.predictive_mean(mu, var)
return [np.asarray(p>(q/100.), dtype=np.int32) for q in quantiles]
def samples(self, gp, Y_metadata=None):
"""
Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -124,7 +124,7 @@ class Exponential(Likelihood):
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
return d3lik_dlink3
def samples(self, gp):
def samples(self, gp, Y_metadata=None):
"""
Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -48,6 +48,7 @@ class Gaussian(Likelihood):
def betaY(self,Y,Y_metadata=None):
#TODO: ~Ricardo this does not live here
raise RuntimeError("Please notify the GPy developers, this should not happen")
return Y/self.gaussian_variance(Y_metadata)
def gaussian_variance(self, Y_metadata=None):
@ -315,9 +316,44 @@ class Gaussian(Likelihood):
return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
if not isinstance(self.gp_link, link_functions.Identity):
return super(Gaussian, self).variational_expectations(Y=Y, m=m, v=v, gh_points=gh_points, Y_metadata=Y_metadata)
lik_var = float(self.variance)
F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var
dF_dmu = (Y - m)/lik_var
dF_dv = np.ones_like(v)*(-0.5/lik_var)
dF_dtheta = -0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2)
return F, dF_dmu, dF_dv, dF_dtheta.reshape(1, Y.shape[0], Y.shape[1])
class HeteroscedasticGaussian(Gaussian):
def __init__(self, Y_metadata, gp_link=None, variance=1., name='het_Gauss'):
if gp_link is None:
gp_link = link_functions.Identity()
if not isinstance(gp_link, link_functions.Identity):
print("Warning, Exact inference is not implemeted for non-identity link functions,\
if you are not already, ensure Laplace inference_method is used")
super(HeteroscedasticGaussian, self).__init__(gp_link, np.ones(Y_metadata['output_index'].shape)*variance, name)
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
return dL_dKdiag[Y_metadata['output_index']]
def gaussian_variance(self, Y_metadata=None):
return self.variance[Y_metadata['output_index'].flatten()]
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
_s = self.variance[Y_metadata['output_index'].flatten()]
if full_cov:
if var.ndim == 2:
var += np.eye(var.shape[0])*_s
if var.ndim == 3:
var += np.atleast_3d(np.eye(var.shape[0])*_s)
else:
var += _s
return mu, var
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
_s = self.variance[Y_metadata['output_index'].flatten()]
return [stats.norm.ppf(q/100.)*np.sqrt(var + _s) + mu for q in quantiles]

View file

@ -607,7 +607,7 @@ class Likelihood(Parameterized):
pred_mean = self.predictive_mean(mu, var, Y_metadata=Y_metadata)
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata=Y_metadata)
except NotImplementedError:
print "Finding predictive mean and variance via sampling rather than quadrature"
print("Finding predictive mean and variance via sampling rather than quadrature")
Nf_samp = 300
Ny_samp = 1
s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu
@ -622,7 +622,7 @@ class Likelihood(Parameterized):
Nf_samp = 300
Ny_samp = 1
s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu
ss_y = self.samples(s, Y_metadata, samples=Ny_samp)
ss_y = self.samples(s, Y_metadata)#, samples=Ny_samp)
#ss_y = ss_y.reshape(mu.shape[0], mu.shape[1], Nf_samp*Ny_samp)
pred_quantiles = [np.percentile(ss_y, q, axis=1)[:,None] for q in quantiles]

View file

@ -2,6 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import scipy
from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf
import scipy as sp
from ..util.misc import safe_exp, safe_square, safe_cube, safe_quad, safe_three_times
@ -140,7 +141,7 @@ class Log_ex_1(GPTransformation):
"""
def transf(self,f):
return np.log1p(safe_exp(f))
return scipy.special.log1p(safe_exp(f))
def dtransf_df(self,f):
ef = safe_exp(f)

View file

@ -145,5 +145,7 @@ class Poisson(Likelihood):
"""
orig_shape = gp.shape
gp = gp.flatten()
# Ysim = np.random.poisson(self.gp_link.transf(gp), [samples, gp.size]).T
# return Ysim.reshape(orig_shape+(samples,))
Ysim = np.random.poisson(self.gp_link.transf(gp))
return Ysim.reshape(orig_shape)

View file

@ -9,6 +9,7 @@ from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_miniba
import logging
from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch
from GPy.core.parameterization.param import Param
from GPy.core.parameterization.observable_array import ObsAr
class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
"""
@ -80,46 +81,10 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
"""Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kw):
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices, **kw)
if self.has_uncertain_inputs():
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
variational_posterior=X,
Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=grad_dict['dL_dpsi2'])
else:
current_values['Xgrad'] = self.kern.gradients_X(grad_dict['dL_dKnm'], X, Z)
current_values['Xgrad'] += self.kern.gradients_X_diag(grad_dict['dL_dKdiag'], X)
if subset_indices is not None:
value_indices['Xgrad'] = subset_indices['samples']
kl_fctr = self.kl_factr
if self.has_uncertain_inputs():
if self.missing_data:
d = self.output_dim
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
else:
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
# Subsetting Variational Posterior objects, makes the gradients
# empty. We need them to be 0 though:
X.mean.gradient[:] = 0
X.variance.gradient[:] = 0
self.variational_prior.update_gradients_KL(X)
if self.missing_data:
current_values['meangrad'] += kl_fctr*X.mean.gradient/d
current_values['vargrad'] += kl_fctr*X.variance.gradient/d
else:
current_values['meangrad'] += kl_fctr*X.mean.gradient
current_values['vargrad'] += kl_fctr*X.variance.gradient
if subset_indices is not None:
value_indices['meangrad'] = subset_indices['samples']
value_indices['vargrad'] = subset_indices['samples']
return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kw):
posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm,
psi0=psi0, psi1=psi1, psi2=psi2, **kw)
return posterior, log_marginal_likelihood, grad_dict
def _outer_values_update(self, full_values):
"""
@ -128,22 +93,47 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
"""
super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values)
if self.has_uncertain_inputs():
self.X.mean.gradient = full_values['meangrad']
self.X.variance.gradient = full_values['vargrad']
meangrad_tmp, vargrad_tmp = self.kern.gradients_qX_expectations(
variational_posterior=self.X,
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
dL_dpsi1=full_values['dL_dpsi1'],
dL_dpsi2=full_values['dL_dpsi2'],
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
self.X.mean.gradient = meangrad_tmp
self.X.variance.gradient = vargrad_tmp
else:
self.X.gradient = full_values['Xgrad']
self.X.gradient = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z)
self.X.gradient += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X)
def _outer_init_full_values(self):
if self.has_uncertain_inputs():
return dict(meangrad=np.zeros(self.X.mean.shape),
vargrad=np.zeros(self.X.variance.shape))
else:
return dict(Xgrad=np.zeros(self.X.shape))
return super(BayesianGPLVMMiniBatch, self)._outer_init_full_values()
def parameters_changed(self):
super(BayesianGPLVMMiniBatch,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch):
return
kl_fctr = self.kl_factr
if kl_fctr > 0:
Xgrad = self.X.gradient.copy()
self.X.gradient[:] = 0
self.variational_prior.update_gradients_KL(self.X)
if self.missing_data or not self.stochastics:
self.X.mean.gradient = kl_fctr*self.X.mean.gradient
self.X.variance.gradient = kl_fctr*self.X.variance.gradient
else:
d = self.output_dim
self.X.mean.gradient = kl_fctr*self.X.mean.gradient*self.stochastics.batchsize/d
self.X.variance.gradient = kl_fctr*self.X.variance.gradient*self.stochastics.batchsize/d
self.X.gradient += Xgrad
if self.missing_data or not self.stochastics:
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
elif self.stochastics:
d = self.output_dim
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d
self._Xgrad = self.X.gradient.copy()
def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,

View file

@ -1,11 +1,11 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Copyright (c) 2015 James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from ..models import GPLVM
from ..mappings import *
from . import GPLVM
from .. import mappings
class BCGPLVM(GPLVM):
@ -16,33 +16,31 @@ class BCGPLVM(GPLVM):
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
:param mapping: mapping for back constraint
:type mapping: GPy.core.Mapping object
"""
def __init__(self, Y, input_dim, init='PCA', X=None, kernel=None, normalize_Y=False, mapping=None):
def __init__(self, Y, input_dim, kernel=None, mapping=None):
if mapping is None:
mapping = Kernel(X=Y, output_dim=input_dim)
mapping = mappings.MLP(input_dim=Y.shape[1],
output_dim=input_dim,
hidden_dim=10)
else:
assert mapping.input_dim==Y.shape[1], "mapping input dim does not work for Y dimension"
assert mapping.output_dim==input_dim, "mapping output dim does not work for self.input_dim"
GPLVM.__init__(self, Y, input_dim, X=mapping.f(Y), kernel=kernel, name="bcgplvm")
self.unlink_parameter(self.X)
self.mapping = mapping
GPLVM.__init__(self, Y, input_dim, init, X, kernel, normalize_Y)
self.X = self.mapping.f(self.likelihood.Y)
self.link_parameter(self.mapping)
def _get_param_names(self):
return self.mapping._get_param_names() + GP._get_param_names(self)
self.X = self.mapping.f(self.Y)
def _get_params(self):
return np.hstack((self.mapping._get_params(), GP._get_params(self)))
def parameters_changed(self):
self.X = self.mapping.f(self.Y)
GP.parameters_changed(self)
Xgradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
self.mapping.update_gradients(Xgradient, self.Y)
def _set_params(self, x):
self.mapping._set_params(x[:self.mapping.num_params])
self.X = self.mapping.f(self.likelihood.Y)
GP._set_params(self, x[self.mapping.num_params:])
def _log_likelihood_gradients(self):
dL_df = self.kern.gradients_X(self.dL_dK, self.X)
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))

View file

@ -16,6 +16,8 @@ class GPHeteroscedasticRegression(GP):
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf
NB: This model does not make inference on the noise outside the training set
"""
def __init__(self, X, Y, kernel=None, Y_metadata=None):
@ -30,10 +32,7 @@ class GPHeteroscedasticRegression(GP):
kernel = kern.RBF(X.shape[1])
#Likelihood
#likelihoods_list = [likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for j in range(Ny)]
noise_terms = np.unique(Y_metadata['output_index'].flatten())
likelihoods_list = [likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for j in noise_terms]
likelihood = likelihoods.MixedNoise(likelihoods_list=likelihoods_list)
likelihood = likelihoods.HeteroscedasticGaussian(Y_metadata)
super(GPHeteroscedasticRegression, self).__init__(X,Y,kernel,likelihood, Y_metadata=Y_metadata)

View file

@ -1,5 +1,5 @@
# Copyright (c) 2014, James Hensman, Alan Saul
# Distributed under the terms of the GNU General public License, see LICENSE.txt
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.model import Model

View file

@ -26,12 +26,12 @@ class GPRegression(GP):
"""
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1.):
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1., mean_function=None):
if kernel is None:
kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Gaussian(variance=noise_var)
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer)
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function)

View file

@ -1,20 +1,17 @@
# Copyright (c) 2014, James Hensman, Alan Saul
# Distributed under the terms of the GNU General public License, see LICENSE.txt
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats
from scipy.special import erf
from ..core.model import Model
from ..core import GP
from ..core.parameterization import ObsAr
from .. import kern
from ..core.parameterization.param import Param
from ..util.linalg import pdinv
from ..likelihoods import Gaussian
from ..inference.latent_function_inference import VarGauss
log_2_pi = np.log(2*np.pi)
class GPVariationalGaussianApproximation(Model):
class GPVariationalGaussianApproximation(GP):
"""
The Variational Gaussian Approximation revisited
@ -26,70 +23,14 @@ class GPVariationalGaussianApproximation(Model):
pages = {786--792},
}
"""
def __init__(self, X, Y, kernel, likelihood=None, Y_metadata=None):
Model.__init__(self,'Variational GP')
if likelihood is None:
likelihood = Gaussian()
# accept the construction arguments
self.X = ObsAr(X)
self.Y = Y
self.num_data, self.input_dim = self.X.shape
self.Y_metadata = Y_metadata
def __init__(self, X, Y, kernel, likelihood, Y_metadata=None):
self.kern = kernel
self.likelihood = likelihood
self.link_parameter(self.kern)
self.link_parameter(self.likelihood)
num_data = Y.shape[0]
self.alpha = Param('alpha', np.zeros((num_data,1))) # only one latent fn for now.
self.beta = Param('beta', np.ones(num_data))
inf = VarGauss(self.alpha, self.beta)
super(GPVariationalGaussianApproximation, self).__init__(X, Y, kernel, likelihood, name='VarGP', inference_method=inf)
self.alpha = Param('alpha', np.zeros((self.num_data,1))) # only one latent fn for now.
self.beta = Param('beta', np.ones(self.num_data))
self.link_parameter(self.alpha)
self.link_parameter(self.beta)
def log_likelihood(self):
return self._log_lik
def parameters_changed(self):
K = self.kern.K(self.X)
m = K.dot(self.alpha)
KB = K*self.beta[:, None]
BKB = KB*self.beta[None, :]
A = np.eye(self.num_data) + BKB
Ai, LA, _, Alogdet = pdinv(A)
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
var = np.diag(Sigma).reshape(-1,1)
F, dF_dm, dF_dv, dF_dthetaL = self.likelihood.variational_expectations(self.Y, m, var, Y_metadata=self.Y_metadata)
self.likelihood.gradient = dF_dthetaL.sum(1).sum(1)
dF_da = np.dot(K, dF_dm)
SigmaB = Sigma*self.beta
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + np.sum(m*self.alpha))
dKL_da = m
A_A2 = Ai - Ai.dot(Ai)
dKL_db = np.diag(np.dot(KB.T, A_A2))
self._log_lik = F.sum() - KL
self.alpha.gradient = dF_da - dKL_da
self.beta.gradient = dF_db - dKL_db
# K-gradients
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
tmp = Ai*self.beta[:, None]/self.beta[None, :]
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
def _raw_predict(self, Xnew):
"""
Predict the function(s) at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
"""
Wi, _, _, _ = pdinv(self.kern.K(self.X) + np.diag(self.beta**-2))
Kux = self.kern.K(self.X, Xnew)
mu = np.dot(Kux.T, self.alpha)
WiKux = np.dot(Wi, Kux)
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(WiKux*Kux, 0)
return mu, var.reshape(-1,1)

View file

@ -36,6 +36,7 @@ class GPLVM(GP):
likelihood = Gaussian()
super(GPLVM, self).__init__(X, Y, kernel, likelihood, name='GPLVM')
self.X = Param('latent_mean', X)
self.link_parameter(self.X, index=0)
@ -43,27 +44,30 @@ class GPLVM(GP):
super(GPLVM, self).parameters_changed()
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
def jacobian(self,X):
J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
for i in range(self.output_dim):
J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X)
return J
#def jacobian(self,X):
# J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
# for i in range(self.output_dim):
# J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X)
# return J
def magnification(self,X):
target=np.zeros(X.shape[0])
#J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
J = self.jacobian(X)
for i in range(X.shape[0]):
target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
return target
#def magnification(self,X):
# target=np.zeros(X.shape[0])
# #J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
## J = self.jacobian(X)
# for i in range(X.shape[0]):
# target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
# return target
def plot(self):
assert self.likelihood.Y.shape[1] == 2
pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) # @UndefinedVariable
assert self.Y.shape[1] == 2, "too high dimensional to plot. Try plot_latent"
from matplotlib import pyplot as plt
plt.scatter(self.Y[:, 0],
self.Y[:, 1],
40, self.X[:, 0].copy(),
linewidth=0, cmap=plt.cm.jet)
Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
mu, _ = self.predict(Xnew)
import pylab as pb
pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
plt.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
@ -78,6 +82,3 @@ class GPLVM(GP):
resolution, ax, marker, s,
fignum, False, legend,
plot_limits, aspect, updates, **kwargs)
def plot_magnification(self, *args, **kwargs):
return util.plot_latent.plot_magnification(self, *args, **kwargs)

View file

@ -251,7 +251,7 @@ class HessianChecker(GradientChecker):
print(grad_string)
if plot:
import pylab as pb
from matplotlib import pyplot as pb
fig, axes = pb.subplots(2, 2)
max_lim = numpy.max(numpy.vstack((analytic_hess, numeric_hess)))
min_lim = numpy.min(numpy.vstack((analytic_hess, numeric_hess)))

View file

@ -170,20 +170,19 @@ class MRD(BayesianGPLVMMiniBatch):
self._log_marginal_likelihood += b._log_marginal_likelihood
self.logger.info('working on im <{}>'.format(hex(id(i))))
self.Z.gradient[:] += b.full_values['Zgrad']
grad_dict = b.full_values
self.Z.gradient[:] += b.Z.gradient#full_values['Zgrad']
#grad_dict = b.full_values
if self.has_uncertain_inputs():
self.X.mean.gradient += grad_dict['meangrad']
self.X.variance.gradient += grad_dict['vargrad']
self.X.gradient += b._Xgrad
else:
self.X.gradient += grad_dict['Xgrad']
self.X.gradient += b._Xgrad
if self.has_uncertain_inputs():
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X)
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
pass
#if self.has_uncertain_inputs():
# # update for the KL divergence
# self.variational_prior.update_gradients_KL(self.X)
# self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
# pass
def log_likelihood(self):
return self._log_marginal_likelihood

View file

@ -63,10 +63,10 @@ class SparseGPMiniBatch(SparseGP):
if stochastic and missing_data:
self.missing_data = True
self.stochastics = SparseGPStochastics(self, batchsize)
self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
elif stochastic and not missing_data:
self.missing_data = False
self.stochastics = SparseGPStochastics(self, batchsize)
self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
elif missing_data:
self.missing_data = True
self.stochastics = SparseGPMissing(self)
@ -80,7 +80,7 @@ class SparseGPMiniBatch(SparseGP):
def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior)
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kwargs):
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kwargs):
"""
This is the standard part, which usually belongs in parameters_changed.
@ -99,47 +99,13 @@ class SparseGPMiniBatch(SparseGP):
like them into this dictionary for inner use of the indices inside the
algorithm.
"""
try:
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None, **kwargs)
except:
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata)
current_values = {}
likelihood.update_gradients(grad_dict['dL_dthetaL'])
current_values['likgrad'] = likelihood.gradient.copy()
if subset_indices is None:
subset_indices = {}
if isinstance(X, VariationalPosterior):
#gradients wrt kernel
dL_dKmm = grad_dict['dL_dKmm']
kern.update_gradients_full(dL_dKmm, Z, None)
current_values['kerngrad'] = kern.gradient.copy()
kern.update_gradients_expectations(variational_posterior=X,
Z=Z,
dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=grad_dict['dL_dpsi2'])
current_values['kerngrad'] += kern.gradient
#gradients wrt Z
current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
current_values['Zgrad'] += kern.gradients_Z_expectations(
grad_dict['dL_dpsi0'],
grad_dict['dL_dpsi1'],
grad_dict['dL_dpsi2'],
Z=Z,
variational_posterior=X)
if psi2 is None:
psi2_sum_n = None
else:
#gradients wrt kernel
kern.update_gradients_diag(grad_dict['dL_dKdiag'], X)
current_values['kerngrad'] = kern.gradient.copy()
kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z)
current_values['kerngrad'] += kern.gradient
kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
current_values['kerngrad'] += kern.gradient
#gradients wrt Z
current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
psi2_sum_n = psi2.sum(axis=0)
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm,
dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs)
return posterior, log_marginal_likelihood, grad_dict
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
"""
@ -173,7 +139,10 @@ class SparseGPMiniBatch(SparseGP):
else:
index = slice(None)
if key in full_values:
full_values[key][index] += current_values[key]
try:
full_values[key][index] += current_values[key]
except:
full_values[key] += current_values[key]
else:
full_values[key] = current_values[key]
@ -192,9 +161,41 @@ class SparseGPMiniBatch(SparseGP):
Here you put the values, which were collected before in the right places.
E.g. set the gradients of parameters, etc.
"""
self.likelihood.gradient = full_values['likgrad']
self.kern.gradient = full_values['kerngrad']
self.Z.gradient = full_values['Zgrad']
if self.has_uncertain_inputs():
#gradients wrt kernel
dL_dKmm = full_values['dL_dKmm']
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
kgrad = self.kern.gradient.copy()
self.kern.update_gradients_expectations(
variational_posterior=self.X,
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
dL_dpsi1=full_values['dL_dpsi1'],
dL_dpsi2=full_values['dL_dpsi2'])
self.kern.gradient += kgrad
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations(
variational_posterior=self.X,
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
dL_dpsi1=full_values['dL_dpsi1'],
dL_dpsi2=full_values['dL_dpsi2'])
else:
#gradients wrt kernel
self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X)
kgrad = self.kern.gradient.copy()
self.kern.update_gradients_full(full_values['dL_dKnm'], self.X, self.Z)
kgrad += self.kern.gradient
self.kern.update_gradients_full(full_values['dL_dKmm'], self.Z, None)
self.kern.gradient += kgrad
#kgrad += self.kern.gradient
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(full_values['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(full_values['dL_dKnm'].T, self.Z, self.X)
self.likelihood.update_gradients(full_values['dL_dthetaL'])
def _outer_init_full_values(self):
"""
@ -209,7 +210,15 @@ class SparseGPMiniBatch(SparseGP):
to initialize the gradients for the mean and the variance in order to
have the full gradient for indexing)
"""
return {}
retd = dict(dL_dKmm=np.zeros((self.Z.shape[0], self.Z.shape[0])))
if self.has_uncertain_inputs():
retd.update(dict(dL_dpsi0=np.zeros(self.X.shape[0]),
dL_dpsi1=np.zeros((self.X.shape[0], self.Z.shape[0])),
dL_dpsi2=np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0]))))
else:
retd.update({'dL_dKdiag': np.zeros(self.X.shape[0]),
'dL_dKnm': np.zeros((self.X.shape[0], self.Z.shape[0]))})
return retd
def _outer_loop_for_missing_data(self):
Lm = None
@ -231,28 +240,36 @@ class SparseGPMiniBatch(SparseGP):
print(message, end=' ')
for d, ninan in self.stochastics.d:
if not self.stochastics:
print(' '*(len(message)) + '\r', end=' ')
message = m_f(d)
print(message, end=' ')
posterior, log_marginal_likelihood, \
grad_dict, current_values, value_indices = self._inner_parameters_changed(
psi0ni = self.psi0[ninan]
psi1ni = self.psi1[ninan]
if self.has_uncertain_inputs():
psi2ni = self.psi2[ninan]
value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, dL_dpsi2=ninan)
else:
psi2ni = None
value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan)
posterior, log_marginal_likelihood, grad_dict = self._inner_parameters_changed(
self.kern, self.X[ninan],
self.Z, self.likelihood,
self.Y_normalized[ninan][:, d], self.Y_metadata,
Lm, dL_dKmm,
subset_indices=dict(outputs=d, samples=ninan))
psi0=psi0ni, psi1=psi1ni, psi2=psi2ni)
self._inner_take_over_or_update(self.full_values, current_values, value_indices)
self._inner_values_update(current_values)
# Fill out the full values by adding in the apporpriate grad_dict
# values
self._inner_take_over_or_update(self.full_values, grad_dict, value_indices)
self._inner_values_update(grad_dict) # What is this for? -> MRD
Lm = posterior.K_chol
dL_dKmm = grad_dict['dL_dKmm']
woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
woodbury_vector[:, d] = posterior.woodbury_vector
self._log_marginal_likelihood += log_marginal_likelihood
if not self.stochastics:
print('')
@ -260,10 +277,10 @@ class SparseGPMiniBatch(SparseGP):
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
self._outer_values_update(self.full_values)
if self.has_uncertain_inputs():
self.kern.return_psi2_n = False
def _outer_loop_without_missing_data(self):
self._log_marginal_likelihood = 0
if self.posterior is None:
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
@ -271,17 +288,16 @@ class SparseGPMiniBatch(SparseGP):
woodbury_inv = self.posterior._woodbury_inv
woodbury_vector = self.posterior._woodbury_vector
d = self.stochastics.d
posterior, log_marginal_likelihood, \
grad_dict, self.full_values, _ = self._inner_parameters_changed(
d = self.stochastics.d[0][0]
posterior, log_marginal_likelihood, grad_dict= self._inner_parameters_changed(
self.kern, self.X,
self.Z, self.likelihood,
self.Y_normalized[:, d], self.Y_metadata)
self.grad_dict = grad_dict
self._log_marginal_likelihood += log_marginal_likelihood
self._log_marginal_likelihood = log_marginal_likelihood
self._outer_values_update(self.full_values)
self._outer_values_update(self.grad_dict)
woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None]
woodbury_vector[:, d] = posterior.woodbury_vector
@ -290,10 +306,23 @@ class SparseGPMiniBatch(SparseGP):
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
def parameters_changed(self):
#Compute the psi statistics for N once, but don't sum out N in psi2
if self.has_uncertain_inputs():
#psi0 = ObsAr(self.kern.psi0(self.Z, self.X))
#psi1 = ObsAr(self.kern.psi1(self.Z, self.X))
#psi2 = ObsAr(self.kern.psi2(self.Z, self.X))
self.psi0 = self.kern.psi0(self.Z, self.X)
self.psi1 = self.kern.psi1(self.Z, self.X)
self.psi2 = self.kern.psi2n(self.Z, self.X)
else:
self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.X, self.Z)
self.psi2 = None
if self.missing_data:
self._outer_loop_for_missing_data()
elif self.stochastics:
self._outer_loop_without_missing_data()
else:
self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
self._outer_values_update(self.full_values)
self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
self._outer_values_update(self.grad_dict)

View file

@ -4,4 +4,8 @@
try:
from . import matplot_dep
except (ImportError, NameError):
print('Fail to load GPy.plotting.matplot_dep.')
# Matplotlib not available
import warnings
warnings.warn(ImportWarning("Matplotlib not available, install newest version of Matplotlib for plotting"))
#sys.modules['matplotlib'] =
#sys.modules[__name__+'.matplot_dep'] = ImportWarning("Matplotlib not available, install newest version of Matplotlib for plotting")

View file

@ -3,7 +3,7 @@
import matplotlib as mpl
import pylab as pb
from matplotlib import pyplot as pb
import sys
#sys.path.append('/home/james/mlprojects/sitran_cluster/')
#from switch_pylab_backend import *
@ -159,7 +159,7 @@ cdict_Alu = {'red' :((0./5,colorsRGB['Aluminium1'][0]/256.,colorsRGB['Aluminium1
# cmap_BGR = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_BGR,256)
# cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256)
if __name__=='__main__':
import pylab as pb
from matplotlib import pyplot as pb
pb.figure()
pb.pcolor(pb.rand(10,10),cmap=cmap_RB)
pb.colorbar()

View file

@ -3,8 +3,8 @@
try:
import Tango
import pylab as pb
#import Tango
from matplotlib import pyplot as pb
except:
pass
import numpy as np
@ -17,11 +17,11 @@ def ax_default(fignum, ax):
fig = ax.figure
return fig, ax
def meanplot(x, mu, color=Tango.colorsHex['darkBlue'], ax=None, fignum=None, linewidth=2,**kw):
def meanplot(x, mu, color='#3300FF', ax=None, fignum=None, linewidth=2,**kw):
_, axes = ax_default(fignum, ax)
return axes.plot(x,mu,color=color,linewidth=linewidth,**kw)
def gpplot(x, mu, lower, upper, edgecol=Tango.colorsHex['darkBlue'], fillcol=Tango.colorsHex['lightBlue'], ax=None, fignum=None, **kwargs):
def gpplot(x, mu, lower, upper, edgecol='#3300FF', fillcol='#33CCFF', ax=None, fignum=None, **kwargs):
_, axes = ax_default(fignum, ax)
mu = mu.flatten()
@ -47,6 +47,32 @@ def gpplot(x, mu, lower, upper, edgecol=Tango.colorsHex['darkBlue'], fillcol=Tan
return plots
def gperrors(x, mu, lower, upper, edgecol=None, ax=None, fignum=None, **kwargs):
_, axes = ax_default(fignum, ax)
mu = mu.flatten()
x = x.flatten()
lower = lower.flatten()
upper = upper.flatten()
plots = []
if edgecol is None:
edgecol='#3300FF'
if not 'alpha' in kwargs.keys():
kwargs['alpha'] = 1.
if not 'lw' in kwargs.keys():
kwargs['lw'] = 1.
plots.append(axes.errorbar(x,mu,yerr=np.vstack([mu-lower,upper-mu]),color=edgecol,**kwargs))
plots[-1][0].remove()
return plots
def removeRightTicks(ax=None):
ax = ax or pb.gca()
for i, line in enumerate(ax.get_yticklines()):

View file

@ -9,7 +9,8 @@ import itertools
try:
import Tango
from matplotlib.cm import get_cmap
import pylab as pb
from matplotlib import pyplot as pb
from matplotlib import cm
except:
pass
@ -114,7 +115,7 @@ def plot_latent(model, labels=None, which_indices=None,
# create a function which computes the shading of latent space according to the output variance
def plot_function(x):
Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
Xtest_full = np.zeros((x.shape[0], X.shape[1]))
Xtest_full[:, [input_1, input_2]] = x
_, var = model.predict(Xtest_full, **predict_kwargs)
var = var[:, :1]
@ -137,7 +138,7 @@ def plot_latent(model, labels=None, which_indices=None,
view = ImshowController(ax, plot_function,
(xmin, ymin, xmax, ymax),
resolution, aspect=aspect, interpolation='bilinear',
cmap=pb.cm.binary, **imshow_kwargs)
cmap=cm.binary, **imshow_kwargs)
# make sure labels are in order of input:
labels = np.asarray(labels)
@ -192,17 +193,18 @@ def plot_latent(model, labels=None, which_indices=None,
if updates:
try:
ax.figure.canvas.show()
fig.canvas.show()
except Exception as e:
print("Could not invoke show: {}".format(e))
raw_input('Enter to continue')
view.deactivate()
#raw_input('Enter to continue')
return view
return ax
def plot_magnification(model, labels=None, which_indices=None,
resolution=60, ax=None, marker='o', s=40,
fignum=None, plot_inducing=False, legend=True,
aspect='auto', updates=False):
plot_limits=None,
aspect='auto', updates=False, mean=True, covariance=True, kern=None):
"""
:param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
:param resolution: the resolution of the grid on which to evaluate the predictive variance
@ -210,6 +212,8 @@ def plot_magnification(model, labels=None, which_indices=None,
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
else:
fig = ax.figure
Tango.reset()
if labels is None:
@ -217,19 +221,90 @@ def plot_magnification(model, labels=None, which_indices=None,
input_1, input_2 = most_significant_input_dimensions(model, which_indices)
# first, plot the output variance as a function of the latent space
Xtest, xx, yy, xmin, xmax = x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution)
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
#fethch the data points X that we'd like to plot
X = model.X
if isinstance(X, VariationalPosterior):
X = X.mean
else:
X = X
if X.shape[0] > 1000:
print("Warning: subsampling X, as it has more samples then 1000. X.shape={!s}".format(X.shape))
subsample = np.random.choice(X.shape[0], size=1000, replace=False)
X = X[subsample]
labels = labels[subsample]
#=======================================================================
# <<<WORK IN PROGRESS>>>
# <<<DO NOT DELETE>>>
# plt.close('all')
# fig, ax = plt.subplots(1,1)
# from GPy.plotting.matplot_dep.dim_reduction_plots import most_significant_input_dimensions
# import matplotlib.patches as mpatches
# i1, i2 = most_significant_input_dimensions(m, None)
# xmin, xmax = 100, -100
# ymin, ymax = 100, -100
# legend_handles = []
#
# X = m.X.mean[:, [i1, i2]]
# X = m.X.variance[:, [i1, i2]]
#
# xmin = X[:,0].min(); xmax = X[:,0].max()
# ymin = X[:,1].min(); ymax = X[:,1].max()
# range_ = [[xmin, xmax], [ymin, ymax]]
# ul = np.unique(labels)
#
# for i, l in enumerate(ul):
# #cdict = dict(red =[(0., colors[i][0], colors[i][0]), (1., colors[i][0], colors[i][0])],
# # green=[(0., colors[i][0], colors[i][1]), (1., colors[i][1], colors[i][1])],
# # blue =[(0., colors[i][0], colors[i][2]), (1., colors[i][2], colors[i][2])],
# # alpha=[(0., 0., .0), (.5, .5, .5), (1., .5, .5)])
# #cmap = LinearSegmentedColormap('{}'.format(l), cdict)
# cmap = LinearSegmentedColormap.from_list('cmap_{}'.format(str(l)), [colors[i], colors[i]], 255)
# cmap._init()
# #alphas = .5*(1+scipy.special.erf(np.linspace(-2,2, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
# alphas = (scipy.special.erf(np.linspace(0,2.4, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
# cmap._lut[:, -1] = alphas
# print l
# x, y = X[labels==l].T
#
# heatmap, xedges, yedges = np.histogram2d(x, y, bins=300, range=range_)
# #heatmap, xedges, yedges = np.histogram2d(x, y, bins=100)
#
# im = ax.imshow(heatmap, extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], cmap=cmap, aspect='auto', interpolation='nearest', label=str(l))
# legend_handles.append(mpatches.Patch(color=colors[i], label=l))
# ax.set_xlim(xmin, xmax)
# ax.set_ylim(ymin, ymax)
# plt.legend(legend_handles, [l.get_label() for l in legend_handles])
# plt.draw()
# plt.show()
#=======================================================================
#Create an IMshow controller that can re-plot the latent space shading at a good resolution
if plot_limits is None:
xmin, ymin = X[:, [input_1, input_2]].min(0)
xmax, ymax = X[:, [input_1, input_2]].max(0)
x_r, y_r = xmax-xmin, ymax-ymin
xmin -= .1*x_r
xmax += .1*x_r
ymin -= .1*y_r
ymax += .1*y_r
else:
try:
xmin, xmax, ymin, ymax = plot_limits
except (TypeError, ValueError) as e:
raise e.__class__("Wrong plot limits: {} given -> need (xmin, xmax, ymin, ymax)".format(plot_limits))
def plot_function(x):
Xtest_full = np.zeros((x.shape[0], X.shape[1]))
Xtest_full[:, [input_1, input_2]] = x
mf=model.magnification(Xtest_full)
mf = model.predict_magnification(Xtest_full, kern=kern, mean=mean, covariance=covariance)
return mf
view = ImshowController(ax, plot_function,
tuple(model.X.min(0)[:, [input_1, input_2]]) + tuple(model.X.max(0)[:, [input_1, input_2]]),
(xmin, ymin, xmax, ymax),
resolution, aspect=aspect, interpolation='bilinear',
cmap=pb.cm.gray)
cmap=cm.get_cmap('Greys'))
# make sure labels are in order of input:
ulabels = []
@ -245,17 +320,17 @@ def plot_magnification(model, labels=None, which_indices=None,
elif type(ul) is np.int64:
this_label = 'class %i' % ul
else:
this_label = 'class %i' % i
this_label = unicode(ul)
m = marker.next()
index = np.nonzero(labels == ul)[0]
if model.input_dim == 1:
x = model.X[index, input_1]
x = X[index, input_1]
y = np.zeros(index.size)
else:
x = model.X[index, input_1]
y = model.X[index, input_2]
ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label)
x = X[index, input_1]
y = X[index, input_2]
ax.scatter(x, y, marker=m, s=s, c=Tango.nextMedium(), label=this_label, linewidth=.2, edgecolor='k', alpha=.9)
ax.set_xlabel('latent dimension %i' % input_1)
ax.set_ylabel('latent dimension %i' % input_2)
@ -263,19 +338,29 @@ def plot_magnification(model, labels=None, which_indices=None,
if not np.all(labels == 1.) and legend:
ax.legend(loc=0, numpoints=1)
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
ax.grid(b=False) # remove the grid if present, it doesn't look good
ax.set_aspect('auto') # set a nice aspect ratio
ax.set_xlim((xmin, xmax))
ax.set_ylim((ymin, ymax))
if plot_inducing:
ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w')
if plot_inducing and hasattr(model, 'Z'):
Z = model.Z
ax.scatter(Z[:, input_1], Z[:, input_2], c='w', s=18, marker="^", edgecolor='k', linewidth=.3, alpha=.7)
try:
fig.canvas.draw()
fig.tight_layout()
fig.canvas.draw()
except Exception as e:
print("Could not invoke tight layout: {}".format(e))
pass
if updates:
fig.canvas.show()
raw_input('Enter to continue')
pb.title('Magnification Factor')
try:
fig.canvas.draw()
fig.canvas.show()
except Exception as e:
print("Could not invoke show: {}".format(e))
#raw_input('Enter to continue')
return view
return ax
@ -314,8 +399,8 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
this_label = 'class %i' % i
m = marker.next()
index = np.nonzero(data_labels == ul)[0]
x = model.X[index, input_1]
y = model.X[index, input_2]
x = X[index, input_1]
y = X[index, input_2]
ax.scatter(x, y, marker=m, s=data_s, color=Tango.nextMedium(), label=this_label)
ax.set_xlabel('latent dimension %i' % input_1)
@ -323,7 +408,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
controller = ImAnnotateController(ax,
plot_function,
tuple(model.X.min(0)[:, significant_dims]) + tuple(model.X.max(0)[:, significant_dims]),
tuple(X.min(0)[:, significant_dims]) + tuple(X.max(0)[:, significant_dims]),
resolution=resolution,
aspect=aspect,
cmap=get_cmap('jet'),

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
try:
import pylab as pb
from matplotlib import pyplot as pb
except:
pass
#import numpy as np

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from matplotlib import pyplot as pb
import Tango
from matplotlib.textpath import TextPath
from matplotlib.transforms import offset_copy

View file

@ -9,6 +9,9 @@ class AxisEventController(object):
def __init__(self, ax):
self.ax = ax
self.activate()
def __del__(self):
self.deactivate()
return self
def deactivate(self):
for cb_class in self.ax.callbacks.callbacks.values():
for cb_num in cb_class.keys():

View file

@ -4,7 +4,7 @@
import numpy as np
try:
import Tango
import pylab as pb
from matplotlib import pyplot as pb
except:
pass
from base_plots import x_frame1D, x_frame2D

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
try:
import pylab as pb
from matplotlib import pyplot as pb
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
#from matplotlib import cm

View file

@ -1,25 +1,82 @@
# Copyright (c) 2012-2015, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
try:
import Tango
import pylab as pb
except:
pass
import numpy as np
from base_plots import gpplot, x_frame1D, x_frame2D
from . import Tango
from .base_plots import gpplot, x_frame1D, x_frame2D,gperrors
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from ...models.warped_gp import WarpedGP
from scipy import sparse
from ...core.parameterization.variational import VariationalPosterior
from matplotlib import pyplot as plt
def plot_data(model, which_data_rows='all',
which_data_ycols='all', visible_dims=None,
fignum=None, ax=None, data_symbol='kx',mew=1.5):
"""
Plot the training data
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
Can plot only part of the data
using which_data_rows and which_data_ycols.
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param visible_dims: an array specifying the input dimensions to plot (maximum two)
:type visible_dims: a numpy array
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
"""
#deal with optional arguments
if which_data_rows == 'all':
which_data_rows = slice(None)
if which_data_ycols == 'all':
which_data_ycols = np.arange(model.output_dim)
if ax is None:
fig = plt.figure(num=fignum)
ax = fig.add_subplot(111)
#data
X = model.X
Y = model.Y
#work out what the inputs are for plotting (1D or 2D)
if visible_dims is None:
visible_dims = np.arange(model.input_dim)
assert visible_dims.size <= 2, "Visible inputs cannot be larger than two"
free_dims = visible_dims
plots = {}
#one dimensional plotting
if len(free_dims) == 1:
for d in which_data_ycols:
plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=mew)
#2D plotting
elif len(free_dims) == 2:
for d in which_data_ycols:
plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40,
Y[which_data_rows, d], cmap=plt.cm.jet, vmin=Y.min(), vmax=Y.max(), linewidth=0.)
else:
raise NotImplementedError("Cannot define a frame with more than two input dimensions")
return plots
def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx',
apply_link=False, samples_f=0, plot_uncertain_inputs=True, predict_kw=None):
apply_link=False, samples_y=0, plot_uncertain_inputs=True, predict_kw=None, plot_training_data=True):
"""
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -37,25 +94,32 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
:type which_data_rows: 'all' or a list of integers
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:type levels: int
:param samples: the number of a posteriori samples to plot p(y*|y)
:param samples: the number of a posteriori samples to plot p(f*|y)
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:type output: integer (first output is 0)
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param plot_raw: Whether to plot the raw function p(f|y)
:type plot_raw: boolean
:param linecol: color of line to plot.
:type linecol:
:type linecol: hex or color
:param fillcol: color of fill
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:param apply_link: apply the link function if plotting f (default false)
:type fillcol: hex or color
:param apply_link: apply the link function if plotting f (default false), as well as posterior samples if requested
:type apply_link: boolean
:param samples_f: the number of posteriori f samples to plot p(f*|y)
:type samples_f: int
:param samples_y: the number of posteriori f samples to plot p(y*|y)
:type samples_y: int
:param plot_uncertain_inputs: plot the uncertainty of the inputs as error bars if they have uncertainty (BGPLVM etc.)
:type plot_uncertain_inputs: boolean
:param predict_kw: keyword args for _raw_predict and predict functions if required
:type predict_kw: dict
:param plot_training_data: whether or not to plot the training points
:type plot_training_data: boolean
"""
#deal with optional arguments
if which_data_rows == 'all':
@ -65,7 +129,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#if len(which_data_ycols)==0:
#raise ValueError('No data selected for plotting')
if ax is None:
fig = pb.figure(num=fignum)
fig = plt.figure(num=fignum)
ax = fig.add_subplot(111)
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
@ -117,31 +181,38 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
Y_metadata = {'output_index': extra_data}
else:
Y_metadata['output_index'] = extra_data
if isinstance(model, WarpedGP):
m, v = model.predict(Xgrid, full_cov=False, median=True, Y_metadata=Y_metadata, **predict_kw)
#print np.concatenate((Xgrid, m), axis=1)
else:
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
lower, upper = model.predict_quantiles(Xgrid, Y_metadata=Y_metadata)
fmu, fv = model._raw_predict(Xgrid, full_cov=False, **predict_kw)
lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=Y_metadata)
for d in which_data_ycols:
plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
#if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
if not plot_raw and plot_training_data:
plots['dataplot'] = plot_data(model=model, which_data_rows=which_data_rows,
visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum)
#optionally plot some samples
if samples: #NOTE not tested with fixed_inputs
Ysim = model.posterior_samples(Xgrid, samples, Y_metadata=Y_metadata)
print Ysim.shape
print Xnew.shape
for yi in Ysim.T:
plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
if samples_f: #NOTE not tested with fixed_inputs
Fsim = model.posterior_samples_f(Xgrid, samples_f)
Fsim = model.posterior_samples_f(Xgrid, samples)
if apply_link:
Fsim = model.likelihood.gp_link.transf(Fsim)
for fi in Fsim.T:
plots['posterior_samples_f'] = ax.plot(Xnew, fi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
plots['posterior_samples'] = ax.plot(Xnew, fi[:,None], '#3300FF', linewidth=0.25)
#ax.plot(Xnew, fi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
if samples_y: #NOTE not tested with fixed_inputs
Ysim = model.posterior_samples(Xgrid, samples_y, Y_metadata=Y_metadata)
for yi in Ysim.T:
plots['posterior_samples_y'] = ax.scatter(Xnew, yi[:,None], s=5, c=Tango.colorsHex['darkBlue'], marker='o', alpha=0.5)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
@ -206,8 +277,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=plt.cm.jet)
#if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
if not plot_raw and plot_training_data:
plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
#set the limits of the plot to some sensible values
ax.set_xlim(xmin[0], xmax[0])
@ -272,3 +345,82 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True, X_
return f_inputs
else:
return X
def errorbars_trainset(model, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
fignum=None, ax=None,
linecol='red', data_symbol='kx',
predict_kw=None, plot_training_data=True, **kwargs):
"""
Plot the posterior error bars corresponding to the training data
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
Can plot only part of the data
using which_data_rows and which_data_ycols.
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param plot_training_data: whether or not to plot the training points
:type plot_training_data: boolean
"""
#deal with optional arguments
if which_data_rows == 'all':
which_data_rows = slice(None)
if which_data_ycols == 'all':
which_data_ycols = np.arange(model.output_dim)
if ax is None:
fig = plt.figure(num=fignum)
ax = fig.add_subplot(111)
X = model.X
Y = model.Y
if predict_kw is None:
predict_kw = {}
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
plots = {}
#one dimensional plotting
if len(free_dims) == 1:
m, v = model.predict(X, full_cov=False, Y_metadata=model.Y_metadata, **predict_kw)
fmu, fv = model._raw_predict(X, full_cov=False, **predict_kw)
lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=model.Y_metadata)
for d in which_data_ycols:
plots['gperrors'] = gperrors(X, m[:, d], lower[:, d], upper[:, d], edgecol=linecol, ax=ax, fignum=fignum, **kwargs )
if plot_training_data:
plots['dataplot'] = plot_data(model=model, which_data_rows=which_data_rows,
visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum)
#set the limits of the plot to some sensible values
ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(X[:,free_dims].min(), X[:,free_dims].max())
ax.set_ylim(ymin, ymax)
elif len(free_dims) == 2:
raise NotImplementedError("Not implemented yet")
else:
raise NotImplementedError("Cannot define a frame with more than two input dimensions")
return plots

View file

@ -4,7 +4,7 @@
import numpy as np
try:
import pylab as pb
from matplotlib import pyplot as pb
except:
pass

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from matplotlib import pyplot as pb
def plot(model, ax=None, fignum=None, Z_height=None, **kwargs):

View file

@ -1,4 +1,4 @@
import pylab as pb, numpy as np
from matplotlib import pyplot as pb, numpy as np
def plot(parameterized, fignum=None, ax=None, colors=None, figsize=(12, 6)):
"""

View file

@ -0,0 +1,109 @@
'''
Created on 4 Sep 2015
@author: maxz
'''
import unittest
import numpy as np
import GPy
class BGPLVMTest(unittest.TestCase):
def setUp(self):
np.random.seed(12345)
X, W = np.random.normal(0,1,(100,6)), np.random.normal(0,1,(6,13))
Y = X.dot(W) + np.random.normal(0, .1, (X.shape[0], W.shape[1]))
self.inan = np.random.binomial(1, .1, Y.shape).astype(bool)
self.X, self.W, self.Y = X,W,Y
self.Q = 3
self.m_full = GPy.models.BayesianGPLVM(Y, self.Q)
def test_lik_comparisons_m1_s0(self):
# Test if the different implementations give the exact same likelihood as the full model.
# All of the following settings should give the same likelihood and gradients as the full model:
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=False)
m[:] = self.m_full[:]
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
assert(m.checkgrad())
def test_predict_missing_data(self):
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
m[:] = self.m_full[:]
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
self.assertRaises(NotImplementedError, m.predict, m.X, full_cov=True)
mu1, var1 = m.predict(m.X, full_cov=False)
mu2, var2 = self.m_full.predict(self.m_full.X, full_cov=False)
np.testing.assert_allclose(mu1, mu2)
np.testing.assert_allclose(var1, var2)
mu1, var1 = m.predict(m.X.mean, full_cov=True)
mu2, var2 = self.m_full.predict(self.m_full.X.mean, full_cov=True)
np.testing.assert_allclose(mu1, mu2)
np.testing.assert_allclose(var1[:,:,0], var2)
mu1, var1 = m.predict(m.X.mean, full_cov=False)
mu2, var2 = self.m_full.predict(self.m_full.X.mean, full_cov=False)
np.testing.assert_allclose(mu1, mu2)
np.testing.assert_allclose(var1[:,[0]], var2)
def test_lik_comparisons_m0_s0(self):
# Test if the different implementations give the exact same likelihood as the full model.
# All of the following settings should give the same likelihood and gradients as the full model:
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=False)
m[:] = self.m_full[:]
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
assert(m.checkgrad())
def test_lik_comparisons_m1_s1(self):
# Test if the different implementations give the exact same likelihood as the full model.
# All of the following settings should give the same likelihood and gradients as the full model:
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
m[:] = self.m_full[:]
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
assert(m.checkgrad())
def test_lik_comparisons_m0_s1(self):
# Test if the different implementations give the exact same likelihood as the full model.
# All of the following settings should give the same likelihood and gradients as the full model:
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=True, batchsize=self.Y.shape[1])
m[:] = self.m_full[:]
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
assert(m.checkgrad())
def test_gradients_missingdata(self):
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=False, batchsize=self.Y.shape[1])
assert(m.checkgrad())
def test_gradients_missingdata_stochastics(self):
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=1)
assert(m.checkgrad())
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=4)
assert(m.checkgrad())
def test_gradients_stochastics(self):
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=True, batchsize=1)
assert(m.checkgrad())
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=True, batchsize=4)
assert(m.checkgrad())
def test_predict(self):
# Test if the different implementations give the exact same likelihood as the full model.
# All of the following settings should give the same likelihood and gradients as the full model:
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
m[:] = self.m_full[:]
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
assert(m.checkgrad())
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()

View file

@ -0,0 +1,37 @@
'''
Created on 4 Sep 2015
@author: maxz
'''
import unittest
from GPy.util.caching import Cacher
from pickle import PickleError
class Test(unittest.TestCase):
def setUp(self):
def op(x):
return x
self.cache = Cacher(op, 1)
def test_pickling(self):
self.assertRaises(PickleError, self.cache.__getstate__)
self.assertRaises(PickleError, self.cache.__setstate__)
def test_copy(self):
tmp = self.cache.__deepcopy__()
assert(tmp.operation is self.cache.operation)
self.assertEqual(tmp.limit, self.cache.limit)
def test_reset(self):
self.cache.reset()
self.assertDictEqual(self.cache.cached_input_ids, {}, )
self.assertDictEqual(self.cache.cached_outputs, {}, )
self.assertDictEqual(self.cache.inputs_changed, {}, )
def test_name(self):
assert(self.cache.__name__ == self.cache.operation.__name__)
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()

View file

@ -2,11 +2,21 @@ import numpy as np
import scipy as sp
from GPy.util import choleskies
import GPy
from ..util.config import config
import unittest
try:
from ..util import linalg_cython
from ..util import choleskies_cython
config.set('cython', 'working', 'True')
except ImportError:
config.set('cython', 'working', 'False')
"""
These tests make sure that the opure python and cython codes work the same
These tests make sure that the pure python and cython codes work the same
"""
@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
class CythonTestChols(np.testing.TestCase):
def setUp(self):
self.flat = np.random.randn(45,5)
@ -20,6 +30,7 @@ class CythonTestChols(np.testing.TestCase):
A2 = choleskies._triang_to_flat_cython(self.triang)
np.testing.assert_allclose(A1, A2)
@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
class test_stationary(np.testing.TestCase):
def setUp(self):
self.k = GPy.kern.RBF(10)
@ -49,17 +60,16 @@ class test_stationary(np.testing.TestCase):
g2 = self.k._lengthscale_grads_cython(self.dKxz, self.X, self.Z)
np.testing.assert_allclose(g1, g2)
@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
class test_choleskies_backprop(np.testing.TestCase):
def setUp(self):
self.dL, self.L = np.random.randn(2, 100, 100)
a =np.random.randn(10,12)
A = a.dot(a.T)
self.L = GPy.util.linalg.jitchol(A)
self.dL = np.random.randn(10,10)
def test(self):
r1 = GPy.util.choleskies._backprop_gradient_pure(self.dL, self.L)
r2 = GPy.util.choleskies.choleskies_cython.backprop_gradient(self.dL, self.L)
r1 = choleskies._backprop_gradient_pure(self.dL, self.L)
r2 = choleskies_cython.backprop_gradient(self.dL, self.L)
r3 = choleskies_cython.backprop_gradient_par_c(self.dL, self.L)
np.testing.assert_allclose(r1, r2)
np.testing.assert_allclose(r1, r3)

99
GPy/testing/gp_tests.py Normal file
View file

@ -0,0 +1,99 @@
'''
Created on 4 Sep 2015
@author: maxz
'''
import unittest
import numpy as np, GPy
from GPy.core.parameterization.variational import NormalPosterior
class Test(unittest.TestCase):
def setUp(self):
np.random.seed(12345)
self.N = 20
self.N_new = 50
self.D = 1
self.X = np.random.uniform(-3., 3., (self.N, 1))
self.Y = np.sin(self.X) + np.random.randn(self.N, self.D) * 0.05
self.X_new = np.random.uniform(-3., 3., (self.N_new, 1))
def test_setxy_bgplvm(self):
k = GPy.kern.RBF(1)
m = GPy.models.BayesianGPLVM(self.Y, 2, kernel=k)
mu, var = m.predict(m.X)
X = m.X.copy()
Xnew = NormalPosterior(m.X.mean[:10].copy(), m.X.variance[:10].copy())
m.set_XY(Xnew, m.Y[:10])
assert(m.checkgrad())
m.set_XY(X, self.Y)
mu2, var2 = m.predict(m.X)
np.testing.assert_allclose(mu, mu2)
np.testing.assert_allclose(var, var2)
def test_setxy_gplvm(self):
k = GPy.kern.RBF(1)
m = GPy.models.GPLVM(self.Y, 2, kernel=k)
mu, var = m.predict(m.X)
X = m.X.copy()
Xnew = X[:10].copy()
m.set_XY(Xnew, m.Y[:10])
assert(m.checkgrad())
m.set_XY(X, self.Y)
mu2, var2 = m.predict(m.X)
np.testing.assert_allclose(mu, mu2)
np.testing.assert_allclose(var, var2)
def test_setxy_gp(self):
k = GPy.kern.RBF(1)
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
mu, var = m.predict(m.X)
X = m.X.copy()
m.set_XY(m.X[:10], m.Y[:10])
assert(m.checkgrad())
m.set_XY(X, self.Y)
mu2, var2 = m.predict(m.X)
np.testing.assert_allclose(mu, mu2)
np.testing.assert_allclose(var, var2)
def test_mean_function(self):
from GPy.core.parameterization.param import Param
from GPy.core.mapping import Mapping
class Parabola(Mapping):
def __init__(self, variance, degree=2, name='parabola'):
super(Parabola, self).__init__(1, 1, name)
self.variance = Param('variance', np.ones(degree+1) * variance)
self.degree = degree
self.link_parameter(self.variance)
def f(self, X):
p = self.variance[0] * np.ones(X.shape)
for i in range(1, self.degree+1):
p += self.variance[i] * X**(i)
return p
def gradients_X(self, dL_dF, X):
grad = np.zeros(X.shape)
for i in range(1, self.degree+1):
grad += (i) * self.variance[i] * X**(i-1)
return grad
def update_gradients(self, dL_dF, X):
for i in range(self.degree+1):
self.variance.gradient[i] = (dL_dF * X**(i)).sum(0)
X = np.linspace(-2, 2, 100)[:, None]
k = GPy.kern.RBF(1)
k.randomize()
p = Parabola(.3)
p.randomize()
Y = p.f(X) + np.random.multivariate_normal(np.zeros(X.shape[0]), k.K(X)+np.eye(X.shape[0])*1e-8)[:,None] + np.random.normal(0, .1, (X.shape[0], 1))
m = GPy.models.GPRegression(X, Y, mean_function=p)
m.randomize()
assert(m.checkgrad())
_ = m.predict(m.X)
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()

View file

@ -8,11 +8,12 @@ The test cases for various inference algorithms
import unittest, itertools
import numpy as np
import GPy
#np.seterr(invalid='raise')
class InferenceXTestCase(unittest.TestCase):
def genData(self):
np.random.seed(1)
D1,D2,N = 12,12,50
x = np.linspace(0, 4 * np.pi, N)[:, None]

View file

@ -6,9 +6,16 @@ import numpy as np
import GPy
import sys
from GPy.core.parameterization.param import Param
from ..util.config import config
verbose = 0
try:
from ..util import linalg_cython
config.set('cython', 'working', 'True')
except ImportError:
config.set('cython', 'working', 'False')
class Kern_check_model(GPy.core.Model):
"""
@ -245,6 +252,11 @@ class KernelGradientTestsContinuous(unittest.TestCase):
continuous_kerns = ['RBF', 'Linear']
self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns]
def test_MLP(self):
k = GPy.kern.MLP(self.D,ARD=True)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Matern32(self):
k = GPy.kern.Matern32(self.D)
k.randomize()
@ -313,6 +325,11 @@ class KernelGradientTestsContinuous(unittest.TestCase):
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_standard_periodic(self):
k = GPy.kern.StdPeriodic(self.D, self.D-1)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
class KernelTestsMiscellaneous(unittest.TestCase):
def setUp(self):
N, D = 100, 10
@ -366,6 +383,7 @@ class KernelTestsNonContinuous(unittest.TestCase):
X2 = self.X2[self.X2[:,-1]!=2]
self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
class Coregionalize_cython_test(unittest.TestCase):
"""
Make sure that the coregionalize kernel work with and without cython enabled
@ -432,6 +450,104 @@ class KernelTestsProductWithZeroValues(unittest.TestCase):
self.assertFalse(np.any(np.isnan(target)),
"Gradient resulted in NaN")
class Kernel_Psi_statistics_GradientTests(unittest.TestCase):
def setUp(self):
from GPy.core.parameterization.variational import NormalPosterior
N,M,Q = 100,20,3
X = np.random.randn(N,Q)
X_var = np.random.rand(N,Q)+0.01
self.Z = np.random.randn(M,Q)
self.qX = NormalPosterior(X, X_var)
self.w1 = np.random.randn(N)
self.w2 = np.random.randn(N,M)
self.w3 = np.random.randn(M,M)
self.w3 = self.w3+self.w3.T
self.w3n = np.random.randn(N,M,M)
self.w3n = self.w3n+np.swapaxes(self.w3n, 1,2)
def test_kernels(self):
from GPy.kern import RBF,Linear,MLP
Q = self.Z.shape[1]
kernels = [RBF(Q,ARD=True), Linear(Q,ARD=True)]
for k in kernels:
k.randomize()
self._test_kernel_param(k)
self._test_Z(k)
self._test_qX(k)
self._test_kernel_param(k, psi2n=True)
self._test_Z(k, psi2n=True)
self._test_qX(k, psi2n=True)
def _test_kernel_param(self, kernel, psi2n=False):
def f(p):
kernel.param_array[:] = p
psi0 = kernel.psi0(self.Z, self.qX)
psi1 = kernel.psi1(self.Z, self.qX)
if not psi2n:
psi2 = kernel.psi2(self.Z, self.qX)
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3*psi2).sum()
else:
psi2 = kernel.psi2n(self.Z, self.qX)
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3n*psi2).sum()
def df(p):
kernel.param_array[:] = p
kernel.update_gradients_expectations(self.w1, self.w2, self.w3 if not psi2n else self.w3n, self.Z, self.qX)
return kernel.gradient.copy()
from GPy.models import GradientChecker
m = GradientChecker(f, df, kernel.param_array.copy())
self.assertTrue(m.checkgrad())
def _test_Z(self, kernel, psi2n=False):
def f(p):
psi0 = kernel.psi0(p, self.qX)
psi1 = kernel.psi1(p, self.qX)
psi2 = kernel.psi2(p, self.qX)
if not psi2n:
psi2 = kernel.psi2(p, self.qX)
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3*psi2).sum()
else:
psi2 = kernel.psi2n(p, self.qX)
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3n*psi2).sum()
def df(p):
return kernel.gradients_Z_expectations(self.w1, self.w2, self.w3 if not psi2n else self.w3n, p, self.qX)
from GPy.models import GradientChecker
m = GradientChecker(f, df, self.Z.copy())
self.assertTrue(m.checkgrad())
def _test_qX(self, kernel, psi2n=False):
def f(p):
self.qX.param_array[:] = p
self.qX._trigger_params_changed()
psi0 = kernel.psi0(self.Z, self.qX)
psi1 = kernel.psi1(self.Z, self.qX)
if not psi2n:
psi2 = kernel.psi2(self.Z, self.qX)
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3*psi2).sum()
else:
psi2 = kernel.psi2n(self.Z, self.qX)
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3n*psi2).sum()
def df(p):
self.qX.param_array[:] = p
self.qX._trigger_params_changed()
grad = kernel.gradients_qX_expectations(self.w1, self.w2, self.w3 if not psi2n else self.w3n, self.Z, self.qX)
self.qX.set_gradients(grad)
return self.qX.gradient.copy()
from GPy.models import GradientChecker
m = GradientChecker(f, df, self.qX.param_array.copy())
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print("Running unit tests, please be (very) patient...")

View file

@ -7,10 +7,8 @@ from GPy.models import GradientChecker
import functools
import inspect
from GPy.likelihoods import link_functions
from GPy.core.parameterization import Param
from functools import partial
#np.random.seed(300)
#np.random.seed(4)
fixed_seed = 7
#np.seterr(divide='raise')
def dparam_partial(inst_func, *args):
@ -105,6 +103,7 @@ class TestNoiseModels(object):
Generic model checker
"""
def setUp(self):
np.random.seed(fixed_seed)
self.N = 15
self.D = 3
self.X = np.random.rand(self.N, self.D)*10
@ -218,7 +217,8 @@ class TestNoiseModels(object):
"constraints": [(".*variance", self.constrain_positive)]
},
"laplace": True,
"ep": False # FIXME: Should be True when we have it working again
"ep": False, # FIXME: Should be True when we have it working again
"variational_expectations": True,
},
"Gaussian_log": {
"model": GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var),
@ -227,7 +227,8 @@ class TestNoiseModels(object):
"vals": [self.var],
"constraints": [(".*variance", self.constrain_positive)]
},
"laplace": True
"laplace": True,
"variational_expectations": True
},
#"Gaussian_probit": {
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
@ -252,7 +253,8 @@ class TestNoiseModels(object):
"link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)],
"laplace": True,
"Y": self.binary_Y,
"ep": False # FIXME: Should be True when we have it working again
"ep": False, # FIXME: Should be True when we have it working again
"variational_expectations": True
},
"Exponential_default": {
"model": GPy.likelihoods.Exponential(),
@ -347,6 +349,10 @@ class TestNoiseModels(object):
ep = attributes["ep"]
else:
ep = False
if "variational_expectations" in attributes:
var_exp = attributes["variational_expectations"]
else:
var_exp = False
#if len(param_vals) > 1:
#raise NotImplementedError("Cannot support multiple params in likelihood yet!")
@ -377,6 +383,11 @@ class TestNoiseModels(object):
if ep:
#ep likelihood gradcheck
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
if var_exp:
#Need to specify mu and var!
yield self.t_varexp, model, Y, Y_metadata
yield self.t_dexp_dmu, model, Y, Y_metadata
yield self.t_dexp_dvar, model, Y, Y_metadata
self.tearDown()
@ -603,6 +614,87 @@ class TestNoiseModels(object):
print(m)
assert m.checkgrad(verbose=1, step=step)
################
# variational expectations #
################
@with_setup(setUp, tearDown)
def t_varexp(self, model, Y, Y_metadata):
#Test that the analytic implementation (if it exists) matches the generic gauss
#hermite implementation
print("\n{}".format(inspect.stack()[0][3]))
#Make mu and var (marginal means and variances of q(f)) draws from a GP
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
L = GPy.util.linalg.jitchol(k)
mu = L.dot(np.random.randn(*Y.shape))
#Variance must be positive
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
expectation = model.variational_expectations(Y=Y, m=mu, v=var, gh_points=None, Y_metadata=Y_metadata)[0]
#Implementation of gauss hermite integration
shape = mu.shape
gh_x, gh_w= np.polynomial.hermite.hermgauss(50)
m,v,Y = mu.flatten(), var.flatten(), Y.flatten()
#make a grid of points
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None]
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
# broadcast needs to be handled carefully.
logp = model.logpdf(X, Y[:,None], Y_metadata=Y_metadata)
#average over the gird to get derivatives of the Gaussian's parameters
#division by pi comes from fact that for each quadrature we need to scale by 1/sqrt(pi)
expectation_gh = np.dot(logp, gh_w)/np.sqrt(np.pi)
expectation_gh = expectation_gh.reshape(*shape)
np.testing.assert_almost_equal(expectation, expectation_gh, decimal=5)
@with_setup(setUp, tearDown)
def t_dexp_dmu(self, model, Y, Y_metadata):
print("\n{}".format(inspect.stack()[0][3]))
#Make mu and var (marginal means and variances of q(f)) draws from a GP
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
L = GPy.util.linalg.jitchol(k)
mu = L.dot(np.random.randn(*Y.shape))
#Variance must be positive
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
expectation = functools.partial(model.variational_expectations, Y=Y, v=var, gh_points=None, Y_metadata=Y_metadata)
#Function to get the nth returned value
def F(mu):
return expectation(m=mu)[0]
def dmu(mu):
return expectation(m=mu)[1]
grad = GradientChecker(F, dmu, mu.copy(), 'm')
grad.randomize()
print(grad)
print(model)
assert grad.checkgrad(verbose=1)
@with_setup(setUp, tearDown)
def t_dexp_dvar(self, model, Y, Y_metadata):
print("\n{}".format(inspect.stack()[0][3]))
#Make mu and var (marginal means and variances of q(f)) draws from a GP
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
L = GPy.util.linalg.jitchol(k)
mu = L.dot(np.random.randn(*Y.shape))
#Variance must be positive
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
expectation = functools.partial(model.variational_expectations, Y=Y, m=mu, gh_points=None, Y_metadata=Y_metadata)
#Function to get the nth returned value
def F(var):
return expectation(v=var)[0]
def dvar(var):
return expectation(v=var)[2]
grad = GradientChecker(F, dvar, var.copy(), 'v')
self.constrain_positive('v', grad)
#grad.randomize()
print(grad)
print(model)
assert grad.checkgrad(verbose=1)
class LaplaceTests(unittest.TestCase):
"""
@ -610,6 +702,7 @@ class LaplaceTests(unittest.TestCase):
"""
def setUp(self):
np.random.seed(fixed_seed)
self.N = 15
self.D = 1
self.X = np.random.rand(self.N, self.D)*10
@ -705,7 +798,7 @@ class LaplaceTests(unittest.TestCase):
post_mean_approx, post_var_approx, = m2.predict(X)
if debug:
import pylab as pb
from matplotlib import pyplot as pb
pb.figure(5)
pb.title('posterior means')
pb.scatter(X, post_mean, c='g')

View file

@ -1,7 +1,6 @@
import numpy as np
import scipy as sp
from GPy.util.linalg import jitchol
import GPy
from ..util.linalg import jitchol,trace_dot, ijk_jlk_to_il, ijk_ljk_to_ilk
class LinalgTests(np.testing.TestCase):
def setUp(self):
@ -37,18 +36,19 @@ class LinalgTests(np.testing.TestCase):
except sp.linalg.LinAlgError:
return True
def test_einsum_ijk_jlk_to_il(self):
A = np.random.randn(50, 150, 5)
B = np.random.randn(150, 100, 5)
pure = np.einsum('ijk,jlk->il', A, B)
quick = GPy.util.linalg.ijk_jlk_to_il(A, B)
np.testing.assert_allclose(pure, quick)
def test_trace_dot(self):
N = 5
A = np.random.rand(N,N)
B = np.random.rand(N,N)
trace = np.trace(A.dot(B))
test_trace = trace_dot(A,B)
np.testing.assert_allclose(trace,test_trace,atol=1e-13)
def test_einsum_ij_jlk_to_ilk(self):
A = np.random.randn(15, 150, 5)
B = np.random.randn(150, 50, 5)
pure = np.einsum('ijk,jlk->il', A, B)
quick = GPy.util.linalg.ijk_jlk_to_il(A,B)
quick = ijk_jlk_to_il(A,B)
np.testing.assert_allclose(pure, quick)
def test_einsum_ijk_ljk_to_ilk(self):
@ -56,5 +56,5 @@ class LinalgTests(np.testing.TestCase):
B = np.random.randn(150, 20, 5)
#B = A.copy()
pure = np.einsum('ijk,ljk->ilk', A, B)
quick = GPy.util.linalg.ijk_ljk_to_ilk(A,B)
quick = ijk_ljk_to_ilk(A,B)
np.testing.assert_allclose(pure, quick)

View file

@ -1,5 +1,5 @@
import numpy as np
import scipy as sp
import scipy
from scipy.special import cbrt
from GPy.models import GradientChecker
_lim_val = np.finfo(np.float64).max
@ -79,8 +79,7 @@ class LinkFunctionTests(np.testing.TestCase):
assert np.isinf(np.exp(np.log(self.f_upper_lim)))
#Check the clipping works
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
#Need to look at most significant figures here rather than the decimals
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), _lim_val, significant=5)
self.assertTrue(np.isfinite(link.transf(self.f_upper_lim)))
self.check_overflow(link, lim_of_inf)
#Check that it would otherwise fail
@ -93,18 +92,18 @@ class LinkFunctionTests(np.testing.TestCase):
link = Log_ex_1()
lim_of_inf = _lim_val_exp
np.testing.assert_almost_equal(np.log1p(np.exp(self.mid_f)), link.transf(self.mid_f))
assert np.isinf(np.log1p(np.exp(np.log(self.f_upper_lim))))
np.testing.assert_almost_equal(scipy.special.log1p(np.exp(self.mid_f)), link.transf(self.mid_f))
assert np.isinf(scipy.special.log1p(np.exp(np.log(self.f_upper_lim))))
#Check the clipping works
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
#Need to look at most significant figures here rather than the decimals
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), np.log1p(_lim_val), significant=5)
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), scipy.special.log1p(_lim_val), significant=5)
self.check_overflow(link, lim_of_inf)
#Check that it would otherwise fail
beyond_lim_of_inf = lim_of_inf + 10.0
old_err_state = np.seterr(over='ignore')
self.assertTrue(np.isinf(np.log1p(np.exp(beyond_lim_of_inf))))
self.assertTrue(np.isinf(scipy.special.log1p(np.exp(beyond_lim_of_inf))))
np.seterr(**old_err_state)

View file

@ -1,6 +1,8 @@
from __future__ import print_function
import numpy as np
import scipy as sp
import GPy
import warnings
class MiscTests(np.testing.TestCase):
"""
@ -11,8 +13,15 @@ class MiscTests(np.testing.TestCase):
self._lim_val_exp = np.log(self._lim_val)
def test_safe_exp_upper(self):
assert np.exp(self._lim_val_exp + 1) == np.inf
assert GPy.util.misc.safe_exp(self._lim_val_exp + 1) < np.inf
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always') # always print
assert np.isfinite(np.exp(self._lim_val_exp))
assert np.isinf(np.exp(self._lim_val_exp + 1))
assert np.isfinite(GPy.util.misc.safe_exp(self._lim_val_exp + 1))
print(w)
print(len(w))
assert len(w)<=1 # should have one overflow warning
def test_safe_exp_lower(self):
assert GPy.util.misc.safe_exp(1e-10) < np.inf

View file

@ -15,6 +15,13 @@ class MiscTests(unittest.TestCase):
self.Y = np.sin(self.X) + np.random.randn(self.N, self.D) * 0.05
self.X_new = np.random.uniform(-3., 3., (self.N_new, 1))
def test_setXY(self):
m = GPy.models.GPRegression(self.X, self.Y)
m.set_XY(np.vstack([self.X, np.random.rand(1,self.X.shape[1])]), np.vstack([self.Y, np.random.rand(1,self.Y.shape[1])]))
m._trigger_params_changed()
self.assertTrue(m.checkgrad())
m.predict(m.X)
def test_raw_predict(self):
k = GPy.kern.RBF(1)
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
@ -36,12 +43,78 @@ class MiscTests(unittest.TestCase):
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
np.testing.assert_almost_equal(mu_hat, mu)
def test_normalizer(self):
k = GPy.kern.RBF(1)
Y = self.Y
mu, std = Y.mean(0), Y.std(0)
m = GPy.models.GPRegression(self.X, Y, kernel=k, normalizer=True)
m.optimize()
assert(m.checkgrad())
k = GPy.kern.RBF(1)
m2 = GPy.models.GPRegression(self.X, (Y-mu)/std, kernel=k, normalizer=False)
m2[:] = m[:]
mu1, var1 = m.predict(m.X, full_cov=True)
mu2, var2 = m2.predict(m2.X, full_cov=True)
np.testing.assert_allclose(mu1, (mu2*std)+mu)
np.testing.assert_allclose(var1, var2)
mu1, var1 = m.predict(m.X, full_cov=False)
mu2, var2 = m2.predict(m2.X, full_cov=False)
np.testing.assert_allclose(mu1, (mu2*std)+mu)
np.testing.assert_allclose(var1, var2)
q50n = m.predict_quantiles(m.X, (50,))
q50 = m2.predict_quantiles(m2.X, (50,))
np.testing.assert_allclose(q50n[0], (q50[0]*std)+mu)
def check_jacobian(self):
try:
import autograd.numpy as np, autograd as ag, GPy, matplotlib.pyplot as plt
from GPy.models import GradientChecker, GPRegression
except:
raise self.skipTest("autograd not available to check gradients")
def k(X, X2, alpha=1., lengthscale=None):
if lengthscale is None:
lengthscale = np.ones(X.shape[1])
exp = 0.
for q in range(X.shape[1]):
exp += ((X[:, [q]] - X2[:, [q]].T)/lengthscale[q])**2
#exp = np.sqrt(exp)
return alpha * np.exp(-.5*exp)
dk = ag.elementwise_grad(lambda x, x2: k(x, x2, alpha=ke.variance.values, lengthscale=ke.lengthscale.values))
dkdk = ag.elementwise_grad(dk, argnum=1)
ke = GPy.kern.RBF(1, ARD=True)
#ke.randomize()
ke.variance = .2#.randomize()
ke.lengthscale[:] = .5
ke.randomize()
X = np.linspace(-1, 1, 1000)[:,None]
X2 = np.array([[0.]]).T
np.testing.assert_allclose(ke.gradients_X([[1.]], X, X), dk(X, X))
np.testing.assert_allclose(ke.gradients_XX([[1.]], X, X).sum(0), dkdk(X, X))
np.testing.assert_allclose(ke.gradients_X([[1.]], X, X2), dk(X, X2))
np.testing.assert_allclose(ke.gradients_XX([[1.]], X, X2).sum(0), dkdk(X, X2))
m = GPRegression(self.X, self.Y)
def f(x):
m.X[:] = x
return m.log_likelihood()
def df(x):
m.X[:] = x
return m.kern.gradients_X(m.grad_dict['dL_dK'], X)
def ddf(x):
m.X[:] = x
return m.kern.gradients_XX(m.grad_dict['dL_dK'], X).sum(0)
gc = GradientChecker(f, df, self.X)
gc2 = GradientChecker(df, ddf, self.X)
assert(gc.checkgrad())
assert(gc2.checkgrad())
def test_sparse_raw_predict(self):
k = GPy.kern.RBF(1)
m = GPy.models.SparseGPRegression(self.X, self.Y, kernel=k)
m.randomize()
Z = m.Z[:]
X = self.X[:]
# Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
Kinv = m.posterior.woodbury_inv
@ -127,11 +200,24 @@ class MiscTests(unittest.TestCase):
m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
kernel=k, missing_data=True)
assert(m.checkgrad())
mul, varl = m.predict(m.X)
k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
m2 = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
kernel=k, missing_data=True)
assert(m.checkgrad())
m2.kern.rbf.lengthscale[:] = 1e6
m2.X[:] = m.X.param_array
m2.likelihood[:] = m.likelihood[:]
m2.kern.white[:] = m.kern.white[:]
mu, var = m.predict(m.X)
np.testing.assert_allclose(mul, mu)
np.testing.assert_allclose(varl, var)
q50 = m.predict_quantiles(m.X, (50,))
np.testing.assert_allclose(mul, q50[0])
def test_likelihood_replicate_kern(self):
m = GPy.models.GPRegression(self.X, self.Y)
@ -410,8 +496,8 @@ class GradientTests(np.testing.TestCase):
self.check_model(rbf, model_type='SparseGPRegression', dimension=2)
def test_SparseGPRegression_rbf_linear_white_kern_1D(self):
''' Testing the sparse GP regression with rbf kernel on 2d data '''
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1)
''' Testing the sparse GP regression with rbf kernel on 1d data '''
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1) + GPy.kern.White(1, 1e-5)
self.check_model(rbflin, model_type='SparseGPRegression', dimension=1)
def test_SparseGPRegression_rbf_linear_white_kern_2D(self):
@ -419,14 +505,12 @@ class GradientTests(np.testing.TestCase):
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2)
# @unittest.expectedFailure
def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs'''
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
raise unittest.SkipTest("This is not implemented yet!")
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1)
# @unittest.expectedFailure
def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs'''
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1)
@ -443,6 +527,16 @@ class GradientTests(np.testing.TestCase):
m = GPy.models.GPLVM(Y, input_dim, kernel=k)
self.assertTrue(m.checkgrad())
def test_BCGPLVM_rbf_bias_white_kern_2D(self):
""" Testing GPLVM with rbf + bias kernel """
N, input_dim, D = 50, 1, 2
X = np.random.rand(N, input_dim)
k = GPy.kern.RBF(input_dim, 0.5, 0.9 * np.ones((1,))) + GPy.kern.Bias(input_dim, 0.1) + GPy.kern.White(input_dim, 0.05)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
m = GPy.models.BCGPLVM(Y, input_dim, kernel=k)
self.assertTrue(m.checkgrad())
def test_GPLVM_rbf_linear_white_kern_2D(self):
""" Testing GPLVM with rbf + bias kernel """
N, input_dim, D = 50, 1, 2
@ -468,23 +562,8 @@ class GradientTests(np.testing.TestCase):
Z = np.linspace(0, 15, 4)[:, None]
kernel = GPy.kern.RBF(1)
m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, Z=Z)
# distribution = GPy.likelihoods.likelihood_functions.Bernoulli()
# likelihood = GPy.likelihoods.EP(Y, distribution)
# m = GPy.core.SparseGP(X, likelihood, kernel, Z)
# m.ensure_default_constraints()
self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_generalized_FITC(self):
N = 20
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
k = GPy.kern.RBF(1) + GPy.kern.White(1)
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
m = GPy.models.FITCClassification(X, Y, kernel=k)
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_multioutput_regression_1D(self):
X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5
@ -494,12 +573,11 @@ class GradientTests(np.testing.TestCase):
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.RBF(1)
m = GPy.models.GPMultioutputRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel_list=[k1])
import ipdb;ipdb.set_trace()
m.constrain_fixed('.*rbf_var', 1.)
m = GPy.models.GPCoregionalizedRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel=k1)
#import ipdb;ipdb.set_trace()
#m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_multioutput_sparse_regression_1D(self):
X1 = np.random.rand(500, 1) * 8
X2 = np.random.rand(300, 1) * 5
@ -509,8 +587,7 @@ class GradientTests(np.testing.TestCase):
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.RBF(1)
m = GPy.models.SparseGPMultioutputRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel_list=[k1])
m.constrain_fixed('.*rbf_var', 1.)
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel=k1)
self.assertTrue(m.checkgrad())
def test_gp_heteroscedastic_regression(self):
@ -539,6 +616,7 @@ class GradientTests(np.testing.TestCase):
self.assertTrue(m.checkgrad())
def test_gp_kronecker_gaussian(self):
np.random.seed(0)
N1, N2 = 30, 20
X1 = np.random.randn(N1, 1)
X2 = np.random.randn(N2, 1)
@ -559,16 +637,16 @@ class GradientTests(np.testing.TestCase):
m.randomize()
mm[:] = m[:]
assert np.allclose(m.log_likelihood(), mm.log_likelihood())
assert np.allclose(m.gradient, mm.gradient)
self.assertTrue(np.allclose(m.log_likelihood(), mm.log_likelihood()))
self.assertTrue(np.allclose(m.gradient, mm.gradient))
X1test = np.random.randn(100, 1)
X2test = np.random.randn(100, 1)
mean1, var1 = m.predict(X1test, X2test)
yy, xx = np.meshgrid(X2test, X1test)
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
mean2, var2 = mm.predict(Xgrid)
assert np.allclose(mean1, mean2)
assert np.allclose(var1, var2)
self.assertTrue( np.allclose(mean1, mean2) )
self.assertTrue( np.allclose(var1, var2) )
def test_gp_VGPC(self):
num_obs = 25
@ -576,7 +654,8 @@ class GradientTests(np.testing.TestCase):
X = X[:, None]
Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kern)
lik = GPy.likelihoods.Gaussian()
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kernel=kern, likelihood=lik)
self.assertTrue(m.checkgrad())

View file

@ -248,10 +248,16 @@ class ParameterizedTest(unittest.TestCase):
m.randomize()
self.assertEqual(m.p1, val)
def test_checkgrad(self):
assert(self.testmodel.kern.checkgrad())
assert(self.testmodel.kern.lengthscale.checkgrad())
assert(self.testmodel.likelihood.checkgrad())
def test_printing(self):
print(self.test1)
print(self.param)
print(self.test1[''])
print(self.testmodel.hierarchy_name(False))
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_add_parameter']

View file

@ -20,6 +20,8 @@ from GPy.examples.dimensionality_reduction import mrd_simulation
from GPy.core.parameterization.variational import NormalPosterior
from GPy.models.gp_regression import GPRegression
from functools import reduce
from GPy.util.caching import Cacher
from pickle import PicklingError
def toy_model():
X = np.linspace(0,1,50)[:, None]
@ -205,23 +207,6 @@ class Test(ListDictTestCase):
def _callback(self, what, which):
what.count += 1
@unittest.skip
def test_add_observer(self):
par = toy_model()
par.name = "original"
par.count = 0
par.add_observer(self, self._callback, 1)
pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
self.assertNotIn(par.observers[0], pcopy.observers)
pcopy = par.copy()
pcopy.name = "copy"
self.assertTrue(par.checkgrad())
self.assertTrue(pcopy.checkgrad())
self.assertTrue(pcopy.kern.checkgrad())
import ipdb;ipdb.set_trace()
self.assertIn(par.observers[0], pcopy.observers)
self.assertEqual(par.count, 3)
self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_parameter_index_operations']

1
GPy/testing/run_coverage.sh Executable file
View file

@ -0,0 +1 @@
nosetests . --with-coverage --logging-level=INFO --cover-html --cover-html-dir=coverage --cover-package=GPy --cover-erase

View file

@ -0,0 +1,101 @@
# Written by Ilias Bilionis
"""
Test if hyperparameters in models are properly transformed.
"""
import unittest
import numpy as np
import scipy.stats as st
import GPy
class TestModel(GPy.core.Model):
"""
A simple GPy model with one parameter.
"""
def __init__(self):
GPy.core.Model.__init__(self, 'test_model')
theta = GPy.core.Param('theta', 1.)
self.link_parameter(theta)
def log_likelihood(self):
return 0.
class RVTransformationTestCase(unittest.TestCase):
def _test_trans(self, trans):
m = TestModel()
prior = GPy.priors.LogGaussian(.5, 0.1)
m.theta.set_prior(prior)
m.theta.unconstrain()
m.theta.constrain(trans)
# The PDF of the transformed variables
p_phi = lambda phi : np.exp(-m._objective_grads(phi)[0])
# To the empirical PDF of:
theta_s = prior.rvs(100000)
phi_s = trans.finv(theta_s)
# which is essentially a kernel density estimation
kde = st.gaussian_kde(phi_s)
# We will compare the PDF here:
phi = np.linspace(phi_s.min(), phi_s.max(), 100)
# The transformed PDF of phi should be this:
pdf_phi = np.array([p_phi(p) for p in phi])
# UNCOMMENT TO SEE GRAPHICAL COMPARISON
#import matplotlib.pyplot as plt
#fig, ax = plt.subplots()
#ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Histogram')
#ax.plot(phi, kde(phi), '--', linewidth=2, label='Kernel Density Estimation')
#ax.plot(phi, pdf_phi, ':', linewidth=2, label='Transformed PDF')
#ax.set_xlabel(r'transformed $\theta$', fontsize=16)
#ax.set_ylabel('PDF', fontsize=16)
#plt.legend(loc='best')
#plt.show(block=True)
# END OF PLOT
# The following test cannot be very accurate
self.assertTrue(np.linalg.norm(pdf_phi - kde(phi)) / np.linalg.norm(kde(phi)) <= 1e-1)
# Check the gradients at a few random points
for i in range(10):
m.theta = theta_s[i]
self.assertTrue(m.checkgrad(verbose=True))
def test_Logexp(self):
self._test_trans(GPy.constraints.Logexp())
self._test_trans(GPy.constraints.Exponent())
if __name__ == '__main__':
unittest.main()
quit()
m = TestModel()
prior = GPy.priors.LogGaussian(0., .9)
m.theta.set_prior(prior)
# The following should return the PDF in terms of the transformed quantities
p_phi = lambda phi : np.exp(-m._objective_grads(phi)[0])
# Let's look at the transformation phi = log(exp(theta - 1))
trans = GPy.constraints.Exponent()
m.theta.constrain(trans)
# Plot the transformed probability density
phi = np.linspace(-8, 8, 100)
fig, ax = plt.subplots()
# Let's draw some samples of theta and transform them so that we see
# which one is right
theta_s = prior.rvs(10000)
# Transform it to the new variables
phi_s = trans.finv(theta_s)
# And draw their histogram
ax.hist(phi_s, normed=True, bins=100, alpha=0.25, label='Empirical')
# This is to be compared to the PDF of the model expressed in terms of these new
# variables
ax.plot(phi, [p_phi(p) for p in phi], label='Transformed PDF', linewidth=2)
ax.set_xlim(-3, 10)
ax.set_xlabel(r'transformed $\theta$', fontsize=16)
ax.set_ylabel('PDF', fontsize=16)
plt.legend(loc='best')
# Now let's test the gradients
m.checkgrad(verbose=True)
# And show the plot
plt.show(block=True)

View file

@ -15,5 +15,5 @@ from . import caching
from . import diag
from . import initialization
from . import multioutput
from . import linalg_gpu
from . import parallel

Some files were not shown because too many files have changed in this diff Show more