merging last master

This commit is contained in:
beckdaniel 2015-09-17 14:43:00 +01:00
commit 1a02c65a61
133 changed files with 13282 additions and 9562 deletions

View file

@ -17,7 +17,7 @@ before_install:
- sudo ln -s /run/shm /dev/shm - sudo ln -s /run/shm /dev/shm
install: install:
- conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.7 scipy=0.12 matplotlib nose sphinx pip nose - conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.9 scipy=0.16 matplotlib nose sphinx pip nose
#- pip install . #- pip install .
- python setup.py build_ext --inplace - python setup.py build_ext --inplace
#--use-mirrors #--use-mirrors

View file

@ -1,7 +1 @@
James Hensman See contributors.
Nicolo Fusi
Ricardo Andrade
Nicolas Durrande
Alan Saul
Max Zwiessele
Neil D. Lawrence

View file

@ -21,16 +21,18 @@ from . import plotting
from .core import Model from .core import Model
from .core.parameterization import Param, Parameterized, ObsAr from .core.parameterization import Param, Parameterized, ObsAr
from .__version__ import __version__
#@nottest #@nottest
try: try:
#Get rid of nose dependency by only ignoring if you have nose installed #Get rid of nose dependency by only ignoring if you have nose installed
from nose.tools import nottest from nose.tools import nottest
@nottest @nottest
def tests(): def tests(verbose=10):
Tester(testing).test(verbose=10) Tester(testing).test(verbose=verbose)
except: except:
def tests(): def tests(verbose=10):
Tester(testing).test(verbose=10) Tester(testing).test(verbose=verbose)
def load(file_path): def load(file_path):
""" """

1
GPy/__version__.py Normal file
View file

@ -0,0 +1 @@
__version__ = "0.8.8"

View file

@ -7,6 +7,6 @@ from .parameterization.param import Param, ParamConcatenation
from .parameterization.observable_array import ObsAr from .parameterization.observable_array import ObsAr
from .gp import GP from .gp import GP
#from .svgp import SVGP from .svgp import SVGP
from .sparse_gp import SparseGP from .sparse_gp import SparseGP
from .mapping import * from .mapping import *

View file

@ -60,9 +60,11 @@ class GP(Model):
self.normalizer.scale_by(Y) self.normalizer.scale_by(Y)
self.Y_normalized = ObsAr(self.normalizer.normalize(Y)) self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
self.Y = Y self.Y = Y
else: elif isinstance(Y, np.ndarray):
self.Y = ObsAr(Y) self.Y = ObsAr(Y)
self.Y_normalized = self.Y self.Y_normalized = self.Y
else:
self.Y = Y
if Y.shape[0] != self.num_data: if Y.shape[0] != self.num_data:
#There can be cases where we want inputs than outputs, for example if we have multiple latent #There can be cases where we want inputs than outputs, for example if we have multiple latent
@ -104,8 +106,23 @@ class GP(Model):
self.link_parameter(self.likelihood) self.link_parameter(self.likelihood)
self.posterior = None self.posterior = None
# The predictive variable to be used to predict using the posterior object's
# woodbury_vector and woodbury_inv is defined as predictive_variable
# as long as the posterior has the right woodbury entries.
# It is the input variable used for the covariance between
# X_star and the posterior of the GP.
# This is usually just a link to self.X (full GP) or self.Z (sparse GP).
# Make sure to name this variable and the predict functions will "just work"
# In maths the predictive variable is:
# K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
# W_{pp} := \texttt{Woodbury inv}
# p := _predictive_variable
def set_XY(self, X=None, Y=None, trigger_update=True): @property
def _predictive_variable(self):
return self.X
def set_XY(self, X=None, Y=None):
""" """
Set the input / output data of the model Set the input / output data of the model
This is useful if we wish to change our existing data but maintain the same model This is useful if we wish to change our existing data but maintain the same model
@ -115,7 +132,7 @@ class GP(Model):
:param Y: output observations :param Y: output observations
:type Y: np.ndarray :type Y: np.ndarray
""" """
if trigger_update: self.update_model(False) self.update_model(False)
if Y is not None: if Y is not None:
if self.normalizer is not None: if self.normalizer is not None:
self.normalizer.scale_by(Y) self.normalizer.scale_by(Y)
@ -131,34 +148,33 @@ class GP(Model):
assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!" assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
self.unlink_parameter(self.X) self.unlink_parameter(self.X)
self.X = X self.X = X
self.link_parameters(self.X) self.link_parameter(self.X)
else: else:
self.unlink_parameter(self.X) self.unlink_parameter(self.X)
from ..core import Param from ..core import Param
self.X = Param('latent mean',X) self.X = Param('latent mean',X)
self.link_parameters(self.X) self.link_parameter(self.X)
else: else:
self.X = ObsAr(X) self.X = ObsAr(X)
if trigger_update: self.update_model(True) self.update_model(True)
if trigger_update: self._trigger_params_changed()
def set_X(self,X, trigger_update=True): def set_X(self,X):
""" """
Set the input data of the model Set the input data of the model
:param X: input observations :param X: input observations
:type X: np.ndarray :type X: np.ndarray
""" """
self.set_XY(X=X, trigger_update=trigger_update) self.set_XY(X=X)
def set_Y(self,Y, trigger_update=True): def set_Y(self,Y):
""" """
Set the output data of the model Set the output data of the model
:param X: output observations :param X: output observations
:type X: np.ndarray :type X: np.ndarray
""" """
self.set_XY(Y=Y, trigger_update=trigger_update) self.set_XY(Y=Y)
def parameters_changed(self): def parameters_changed(self):
""" """
@ -181,7 +197,7 @@ class GP(Model):
""" """
return self._log_marginal_likelihood return self._log_marginal_likelihood
def _raw_predict(self, _Xnew, full_cov=False, kern=None): def _raw_predict(self, Xnew, full_cov=False, kern=None):
""" """
For making predictions, does not account for normalization or likelihood For making predictions, does not account for normalization or likelihood
@ -197,24 +213,33 @@ class GP(Model):
if kern is None: if kern is None:
kern = self.kern kern = self.kern
Kx = kern.K(_Xnew, self.X).T Kx = kern.K(self._predictive_variable, Xnew)
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
mu = np.dot(Kx.T, self.posterior.woodbury_vector) mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if len(mu.shape)==1:
mu = mu.reshape(-1,1)
if full_cov: if full_cov:
Kxx = kern.K(_Xnew) Kxx = kern.K(Xnew)
var = Kxx - np.dot(Kx.T, WiKx) if self.posterior.woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
from ..util.linalg import mdot
for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
var = var
else: else:
Kxx = kern.Kdiag(_Xnew) Kxx = kern.Kdiag(Xnew)
var = Kxx - np.sum(WiKx*Kx, 0) if self.posterior.woodbury_inv.ndim == 2:
var = var.reshape(-1, 1) var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
var[var<0.] = 0. elif self.posterior.woodbury_inv.ndim == 3: # Missing data
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]):
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
var = var
#add in the mean function
if self.mean_function is not None:
mu += self.mean_function.f(Xnew)
#force mu to be a column vector
if len(mu.shape)==1: mu = mu[:,None]
#add the mean function in
if not self.mean_function is None:
mu += self.mean_function.f(_Xnew)
return mu, var return mu, var
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None): def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
@ -247,7 +272,7 @@ class GP(Model):
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata) mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
return mean, var return mean, var
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None): def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None):
""" """
Get the predictive quantiles around the prediction at X Get the predictive quantiles around the prediction at X
@ -255,10 +280,12 @@ class GP(Model):
:type X: np.ndarray (Xnew x self.input_dim) :type X: np.ndarray (Xnew x self.input_dim)
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval :param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
:type quantiles: tuple :type quantiles: tuple
:param kern: optional kernel to use for prediction
:type predict_kw: dict
:returns: list of quantiles for each X and predictive quantiles for interval combination :returns: list of quantiles for each X and predictive quantiles for interval combination
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)] :rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
""" """
m, v = self._raw_predict(X, full_cov=False) m, v = self._raw_predict(X, full_cov=False, kern=kern)
if self.normalizer is not None: if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v) m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata) return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
@ -292,6 +319,120 @@ class GP(Model):
return dmu_dX, dv_dX return dmu_dX, dv_dX
def predict_jacobian(self, Xnew, kern=None, full_cov=True):
"""
Compute the derivatives of the posterior of the GP.
Given a set of points at which to predict X* (size [N*,Q]), compute the
mean and variance of the derivative. Resulting arrays are sized:
dL_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
Note that this is the mean and variance of the derivative,
not the derivative of the mean and variance! (See predictive_gradients for that)
dv_dX* -- [N*, Q], (since all outputs have the same variance)
If there is missing data, it is not implemented for now, but
there will be one output variance per output dimension.
:param X: The points at which to get the predictive gradients.
:type X: np.ndarray (Xnew x self.input_dim)
:param kern: The kernel to compute the jacobian for.
:param boolean full_cov: whether to return the full covariance of the jacobian.
:returns: dmu_dX, dv_dX
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ]
Note: We always return sum in input_dim gradients, as the off-diagonals
in the input_dim are not needed for further calculations.
This is a compromise for increase in speed. Mathematically the jacobian would
have another dimension in Q.
"""
if kern is None:
kern = self.kern
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
for i in range(self.output_dim):
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)
dK_dXnew_full = np.empty((self._predictive_variable.shape[0], Xnew.shape[0], Xnew.shape[1]))
for i in range(self._predictive_variable.shape[0]):
dK_dXnew_full[i] = kern.gradients_X([[1.]], Xnew, self._predictive_variable[[i]])
if full_cov:
dK2_dXdX = kern.gradients_XX([[1.]], Xnew)
else:
dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew)
def compute_cov_inner(wi):
if full_cov:
# full covariance gradients:
var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
else:
var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
return var_jac
if self.posterior.woodbury_inv.ndim == 3: # Missing data:
if full_cov:
var_jac = np.empty((Xnew.shape[0],Xnew.shape[0],Xnew.shape[1],self.output_dim))
for d in range(self.posterior.woodbury_inv.shape[2]):
var_jac[:, :, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
else:
var_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
for d in range(self.posterior.woodbury_inv.shape[2]):
var_jac[:, :, d] = compute_cov_inner(self.posterior.woodbury_inv[:, :, d])
else:
var_jac = compute_cov_inner(self.posterior.woodbury_inv)
return mean_jac, var_jac
def predict_wishard_embedding(self, Xnew, kern=None, mean=True, covariance=True):
"""
Predict the wishard embedding G of the GP. This is the density of the
input of the GP defined by the probabilistic function mapping f.
G = J_mean.T*J_mean + output_dim*J_cov.
:param array-like Xnew: The points at which to evaluate the magnification.
:param :py:class:`~GPy.kern.Kern` kern: The kernel to use for the magnification.
Supplying only a part of the learning kernel gives insights into the density
of the specific kernel part of the input function. E.g. one can see how dense the
linear part of a kernel is compared to the non-linear part etc.
"""
if kern is None:
kern = self.kern
mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False)
mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
Sigma = np.zeros(mumuT.shape)
if var_jac.ndim == 3:
Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = var_jac.sum(-1)
else:
Sigma[(slice(None), )+np.diag_indices(Xnew.shape[1], 2)] = self.output_dim*var_jac
G = 0.
if mean:
G += mumuT
if covariance:
G += Sigma
return G
def predict_magnification(self, Xnew, kern=None, mean=True, covariance=True):
"""
Predict the magnification factor as
sqrt(det(G))
for each point N in Xnew
"""
G = self.predict_wishard_embedding(Xnew, kern, mean, covariance)
from ..util.linalg import jitchol
mag = np.empty(Xnew.shape[0])
for n in range(Xnew.shape[0]):
try:
mag[n] = np.sqrt(np.exp(2*np.sum(np.log(np.diag(jitchol(G[n, :, :]))))))
except:
mag[n] = np.sqrt(np.linalg.det(G[n, :, :]))
return mag
def posterior_samples_f(self,X,size=10, full_cov=True): def posterior_samples_f(self,X,size=10, full_cov=True):
""" """
Samples the posterior GP at the points X. Samples the posterior GP at the points X.
@ -395,8 +536,8 @@ class GP(Model):
def plot(self, plot_limits=None, which_data_rows='all', def plot(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[], which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None, levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False, plot_raw=False, linecol=None,fillcol=None, Y_metadata=None,
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx', predict_kw=None): data_symbol='kx', predict_kw=None, plot_training_data=True, samples_y=0, apply_link=False):
""" """
Plot the posterior of the GP. Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations. - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -419,7 +560,7 @@ class GP(Model):
:param levels: number of levels to plot in a contour plot. :param levels: number of levels to plot in a contour plot.
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:type levels: int :type levels: int
:param samples: the number of a posteriori samples to plot :param samples: the number of a posteriori samples to plot, p(f*|y)
:type samples: int :type samples: int
:param fignum: figure to plot on. :param fignum: figure to plot on.
:type fignum: figure number :type fignum: figure number
@ -433,6 +574,12 @@ class GP(Model):
:type Y_metadata: dict :type Y_metadata: dict
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx') :param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib. :type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
:param plot_training_data: whether or not to plot the training points
:type plot_training_data: boolean
:param samples_y: the number of a posteriori samples to plot, p(y*|y)
:type samples_y: int
:param apply_link: if there is a link function of the likelihood, plot the link(f*) rather than f*, when plotting posterior samples f
:type apply_link: boolean
""" """
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots from ..plotting.matplot_dep import models_plots
@ -445,7 +592,103 @@ class GP(Model):
which_data_ycols, fixed_inputs, which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution, levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata, plot_raw=plot_raw, Y_metadata=Y_metadata,
data_symbol=data_symbol, predict_kw=predict_kw, **kw) data_symbol=data_symbol, predict_kw=predict_kw,
plot_training_data=plot_training_data, samples_y=samples_y, apply_link=apply_link, **kw)
def plot_data(self, which_data_rows='all',
which_data_ycols='all', visible_dims=None,
fignum=None, ax=None, data_symbol='kx'):
"""
Plot the training data
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
Can plot only part of the data
using which_data_rows and which_data_ycols.
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_ycols: 'all' or a list of integers
:param visible_dims: an array specifying the input dimensions to plot (maximum two)
:type visible_dims: a numpy array
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:type levels: int
:param samples: the number of a posteriori samples to plot, p(f*|y)
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
:type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param fillcol: color of fill [Tango.colorsHex['lightBlue']]
:type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots
kw = {}
return models_plots.plot_data(self, which_data_rows,
which_data_ycols, visible_dims,
fignum, ax, data_symbol, **kw)
def errorbars_trainset(self, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[], fignum=None, ax=None,
linecol=None, data_symbol='kx', predict_kw=None, plot_training_data=True,lw=None):
"""
Plot the posterior error bars corresponding to the training data
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
Can plot only part of the data
using which_data_rows and which_data_ycols.
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param plot_training_data: whether or not to plot the training points
:type plot_training_data: boolean
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots
kw = {}
if lw is not None:
kw['lw'] = lw
return models_plots.errorbars_trainset(self, which_data_rows, which_data_ycols, fixed_inputs,
fignum, ax, linecol, data_symbol,
predict_kw, plot_training_data, **kw)
def plot_magnification(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
fignum=None, legend=True,
plot_limits=None,
aspect='auto', updates=False, plot_inducing=True, kern=None, **kwargs):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_magnification(self, labels, which_indices,
resolution, ax, marker, s,
fignum, plot_inducing, legend,
plot_limits, aspect, updates, **kwargs)
def input_sensitivity(self, summarize=True): def input_sensitivity(self, summarize=True):
""" """

View file

@ -32,7 +32,7 @@ class Bijective_mapping(Mapping):
also back from f to X. The inverse mapping is called g(). also back from f to X. The inverse mapping is called g().
""" """
def __init__(self, input_dim, output_dim, name='bijective_mapping'): def __init__(self, input_dim, output_dim, name='bijective_mapping'):
super(Bijective_apping, self).__init__(name=name) super(Bijective_mapping, self).__init__(name=name)
def g(self, f): def g(self, f):
"""Inverse mapping from output domain of the function to the inputs.""" """Inverse mapping from output domain of the function to the inputs."""

View file

@ -42,7 +42,7 @@ class Param(Parameterizable, ObsAr):
Multilevel indexing (e.g. self[:2][1:]) is not supported and might lead to unexpected behaviour. Multilevel indexing (e.g. self[:2][1:]) is not supported and might lead to unexpected behaviour.
Try to index in one go, using boolean indexing or the numpy builtin Try to index in one go, using boolean indexing or the numpy builtin
np.index function. np.index function.
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc. See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
""" """
@ -180,6 +180,7 @@ class Param(Parameterizable, ObsAr):
import copy import copy
Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo)) Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
return s return s
def _setup_observers(self): def _setup_observers(self):
""" """
Setup the default observers Setup the default observers

View file

@ -74,7 +74,7 @@ class Parameterized(Parameterizable):
# Metaclass for parameters changed after init. # Metaclass for parameters changed after init.
# This makes sure, that parameters changed will always be called after __init__ # This makes sure, that parameters changed will always be called after __init__
# **Never** call parameters_changed() yourself # **Never** call parameters_changed() yourself
#This is ignored in Python 3 -- you need to put the meta class in the function definition. #This is ignored in Python 3 -- you need to put the meta class in the function definition.
#__metaclass__ = ParametersChangedMeta #__metaclass__ = ParametersChangedMeta
#The six module is used to support both Python 2 and 3 simultaneously #The six module is used to support both Python 2 and 3 simultaneously
#=========================================================================== #===========================================================================
@ -197,9 +197,10 @@ class Parameterized(Parameterizable):
raise RuntimeError("{} does not seem to be a parameter, remove parameters directly from their respective parents".format(str(param))) raise RuntimeError("{} does not seem to be a parameter, remove parameters directly from their respective parents".format(str(param)))
start = sum([p.size for p in self.parameters[:param._parent_index_]]) start = sum([p.size for p in self.parameters[:param._parent_index_]])
self._remove_parameter_name(param)
self.size -= param.size self.size -= param.size
del self.parameters[param._parent_index_] del self.parameters[param._parent_index_]
self._remove_parameter_name(param)
param._disconnect_parent() param._disconnect_parent()
param.remove_observer(self, self._pass_through_notify_observers) param.remove_observer(self, self._pass_through_notify_observers)
@ -315,7 +316,7 @@ class Parameterized(Parameterizable):
param[:] = val; return param[:] = val; return
except AttributeError: except AttributeError:
pass pass
object.__setattr__(self, name, val); return object.__setattr__(self, name, val);
#=========================================================================== #===========================================================================
# Pickling # Pickling

View file

@ -366,6 +366,7 @@ class InverseGamma(Gamma):
def rvs(self, n): def rvs(self, n):
return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n) return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
class DGPLVM_KFDA(Prior): class DGPLVM_KFDA(Prior):
""" """
Implementation of the Discriminative Gaussian Process Latent Variable function using Implementation of the Discriminative Gaussian Process Latent Variable function using
@ -512,6 +513,7 @@ class DGPLVM_KFDA(Prior):
self.A = self.compute_A(lst_ni) self.A = self.compute_A(lst_ni)
self.x_shape = x_shape self.x_shape = x_shape
class DGPLVM(Prior): class DGPLVM(Prior):
""" """
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel. Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
@ -669,7 +671,7 @@ class DGPLVM(Prior):
M_i = self.compute_Mi(cls) M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0) Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i) Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) # sb_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
@ -903,7 +905,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0] #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0] Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function # This function calculates derivative of the log of prior function
@ -927,7 +929,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0] #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0] Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N) Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw) Sw_trans = np.transpose(Sw)
@ -1198,6 +1200,7 @@ class DGPLVM_T(Prior):
class HalfT(Prior): class HalfT(Prior):
""" """
Implementation of the half student t probability function, coupled with random variables. Implementation of the half student t probability function, coupled with random variables.
@ -1208,15 +1211,17 @@ class HalfT(Prior):
""" """
domain = _POSITIVE domain = _POSITIVE
_instances = [] _instances = []
def __new__(cls, A, nu): # Singleton:
def __new__(cls, A, nu): # Singleton:
if cls._instances: if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()] cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances: for instance in cls._instances:
if instance().A == A and instance().nu == nu: if instance().A == A and instance().nu == nu:
return instance() return instance()
o = super(Prior, cls).__new__(cls, A, nu) o = super(Prior, cls).__new__(cls, A, nu)
cls._instances.append(weakref.ref(o)) cls._instances.append(weakref.ref(o))
return cls._instances[-1]() return cls._instances[-1]()
def __init__(self, A, nu): def __init__(self, A, nu):
self.A = float(A) self.A = float(A)
self.nu = float(nu) self.nu = float(nu)
@ -1225,37 +1230,81 @@ class HalfT(Prior):
def __str__(self): def __str__(self):
return "hT({:.2g}, {:.2g})".format(self.A, self.nu) return "hT({:.2g}, {:.2g})".format(self.A, self.nu)
def lnpdf(self,theta): def lnpdf(self, theta):
return (theta>0) * ( self.constant -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 ) ) return (theta > 0) * (self.constant - .5*(self.nu + 1) * np.log(1. + (1./self.nu) * (theta/self.A)**2))
#theta = theta if isinstance(theta,np.ndarray) else np.array([theta]) # theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
#lnpdfs = np.zeros_like(theta) # lnpdfs = np.zeros_like(theta)
#theta = np.array([theta]) # theta = np.array([theta])
#above_zero = theta.flatten()>1e-6 # above_zero = theta.flatten()>1e-6
#v = self.nu # v = self.nu
#sigma2=self.A # sigma2=self.A
#stop # stop
#lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5) # lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5)
# - gammaln(v * 0.5) # - gammaln(v * 0.5)
# - 0.5*np.log(sigma2 * v * np.pi) # - 0.5*np.log(sigma2 * v * np.pi)
# - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2)) # - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2))
#) # )
#return lnpdfs # return lnpdfs
def lnpdf_grad(self,theta): def lnpdf_grad(self, theta):
theta = theta if isinstance(theta,np.ndarray) else np.array([theta]) theta = theta if isinstance(theta, np.ndarray) else np.array([theta])
grad = np.zeros_like(theta) grad = np.zeros_like(theta)
above_zero = theta>1e-6 above_zero = theta > 1e-6
v = self.nu v = self.nu
sigma2=self.A sigma2 = self.A
grad[above_zero] = -0.5*(v+1)*(2*theta[above_zero])/(v*sigma2 + theta[above_zero][0]**2) grad[above_zero] = -0.5*(v+1)*(2*theta[above_zero])/(v*sigma2 + theta[above_zero][0]**2)
return grad return grad
def rvs(self, n): def rvs(self, n):
#return np.random.randn(n) * self.sigma + self.mu # return np.random.randn(n) * self.sigma + self.mu
from scipy.stats import t from scipy.stats import t
#[np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)]) # [np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)])
ret = t.rvs(self.nu,loc=0,scale=self.A, size=n) ret = t.rvs(self.nu, loc=0, scale=self.A, size=n)
ret[ret<0] = 0 ret[ret < 0] = 0
return ret return ret
class Exponential(Prior):
"""
Implementation of the Exponential probability function,
coupled with random variables.
:param l: shape parameter
"""
domain = _POSITIVE
_instances = []
def __new__(cls, l): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().l == l:
return instance()
o = super(Exponential, cls).__new__(cls, l)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, l):
self.l = l
def __str__(self):
return "Exp({:.2g})".format(self.l)
def summary(self):
ret = {"E[x]": 1. / self.l,
"E[ln x]": np.nan,
"var[x]": 1. / self.l**2,
"Entropy": 1. - np.log(self.l),
"Mode": 0.}
return ret
def lnpdf(self, x):
return np.log(self.l) - self.l * x
def lnpdf_grad(self, x):
return - self.l
def rvs(self, n):
return np.random.exponential(scale=self.l, size=n)

View file

@ -62,7 +62,7 @@ class Transformation(object):
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from ...plotting.matplot_dep import base_plots from ...plotting.matplot_dep import base_plots
x = np.linspace(-8,8) x = np.linspace(-8,8)
base_plots.meanplot(x, self.f(x),axes=axes*args,**kw) base_plots.meanplot(x, self.f(x), *args, ax=axes, **kw)
axes = plt.gca() axes = plt.gca()
axes.set_xlabel(xlabel) axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel) axes.set_ylabel(ylabel)

View file

@ -49,7 +49,7 @@ class SparseGP(GP):
else: else:
#inference_method = ?? #inference_method = ??
raise NotImplementedError("what to do what to do?") raise NotImplementedError("what to do what to do?")
print("defaulting to ", inference_method, "for latent function inference") print(("defaulting to ", inference_method, "for latent function inference"))
self.Z = Param('inducing inputs', Z) self.Z = Param('inducing inputs', Z)
self.num_inducing = Z.shape[0] self.num_inducing = Z.shape[0]
@ -60,6 +60,10 @@ class SparseGP(GP):
self.link_parameter(self.Z, index=0) self.link_parameter(self.Z, index=0)
self.posterior = None self.posterior = None
@property
def _predictive_variable(self):
return self.Z
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)
@ -114,63 +118,66 @@ class SparseGP(GP):
Make a prediction for the latent function values. Make a prediction for the latent function values.
For certain inputs we give back a full_cov of shape NxN, For certain inputs we give back a full_cov of shape NxN,
if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of, if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of,
we take only the diagonal elements across N. we take only the diagonal elements across N.
For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we For uncertain inputs, the SparseGP bound produces cannot predict the full covariance matrix full_cov for now.
return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD). The implementation of that will follow. However, for each dimension the
This is for both with and without missing data. See for missing data SparseGP implementation py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'. covariance changes, so if full_cov is False (standard), we return the variance
for each dimension [NxD].
""" """
if kern is None: kern = self.kern if kern is None: kern = self.kern
if not isinstance(Xnew, VariationalPosterior): if not isinstance(Xnew, VariationalPosterior):
Kx = kern.K(self.Z, Xnew) # Kx = kern.K(self._predictive_variable, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector) # mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov: # if full_cov:
Kxx = kern.K(Xnew) # Kxx = kern.K(Xnew)
if self.posterior.woodbury_inv.ndim == 2: # if self.posterior.woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx)) # var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3: # elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2])) # var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[2]): # for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx)) # var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
var = var # var = var
else: # else:
Kxx = kern.Kdiag(Xnew) # Kxx = kern.Kdiag(Xnew)
if self.posterior.woodbury_inv.ndim == 2: # if self.posterior.woodbury_inv.ndim == 2:
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None] # var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
elif self.posterior.woodbury_inv.ndim == 3: # elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2])) # var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]): # for i in range(var.shape[1]):
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0))) # var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
var = var # var = var
#add in the mean function # #add in the mean function
if self.mean_function is not None: # if self.mean_function is not None:
mu += self.mean_function.f(Xnew) # mu += self.mean_function.f(Xnew)
mu, var = super(SparseGP, self)._raw_predict(Xnew, full_cov, kern)
else: else:
psi0_star = kern.psi0(self.Z, Xnew) psi0_star = kern.psi0(self._predictive_variable, Xnew)
psi1_star = kern.psi1(self.Z, Xnew) psi1_star = kern.psi1(self._predictive_variable, Xnew)
#psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code. #psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
la = self.posterior.woodbury_vector la = self.posterior.woodbury_vector
mu = np.dot(psi1_star, la) # TODO: dimensions? mu = np.dot(psi1_star, la) # TODO: dimensions?
if full_cov: if full_cov:
raise NotImplementedError("Full covariance for Sparse GP predicted with uncertain inputs not implemented yet.")
var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1])) var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1]))
di = np.diag_indices(la.shape[1]) di = np.diag_indices(la.shape[1])
else: else:
var = np.empty((Xnew.shape[0], la.shape[1])) var = np.empty((Xnew.shape[0], la.shape[1]))
for i in range(Xnew.shape[0]): for i in range(Xnew.shape[0]):
_mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]] _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
psi2_star = kern.psi2(self.Z, NormalPosterior(_mu, _var)) psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var))
tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]])) tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
var_ = mdot(la.T, tmp, la) var_ = mdot(la.T, tmp, la)
p0 = psi0_star[i] p0 = psi0_star[i]
t = np.atleast_3d(self.posterior.woodbury_inv) t = np.atleast_3d(self.posterior.woodbury_inv)
t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2) t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2)
if full_cov: if full_cov:
var_[di] += p0 var_[di] += p0
var_[di] += -t2 var_[di] += -t2

View file

@ -34,7 +34,7 @@ class SparseGP_MPI(SparseGP):
""" """
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False): def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp', Y_metadata=None, mpi_comm=None, normalizer=False):
self._IN_OPTIMIZATION_ = False self._IN_OPTIMIZATION_ = False
if mpi_comm != None: if mpi_comm != None:
if inference_method is None: if inference_method is None:

View file

@ -1,11 +1,11 @@
# Copyright (c) 2014, James Hensman, Alex Matthews # Copyright (c) 2014, James Hensman, Alex Matthews
# Distributed under the terms of the GNU General public License, see LICENSE.txt # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ..util import choleskies from ..util import choleskies
from .sparse_gp import SparseGP from .sparse_gp import SparseGP
from .parameterization.param import Param from .parameterization.param import Param
from ..inference.latent_function_inference import SVGP as svgp_inf from ..inference.latent_function_inference.svgp import SVGP as svgp_inf
class SVGP(SparseGP): class SVGP(SparseGP):

View file

@ -24,7 +24,6 @@ class VerboseOptimization(object):
self.model.add_observer(self, self.print_status) self.model.add_observer(self, self.print_status)
self.status = 'running' self.status = 'running'
self.clear = clear_after_finish self.clear = clear_after_finish
self.deltat = .2
self.update() self.update()
@ -80,6 +79,7 @@ class VerboseOptimization(object):
def __enter__(self): def __enter__(self):
self.start = time.time() self.start = time.time()
self._time = self.start
return self return self
def print_out(self, seconds): def print_out(self, seconds):
@ -143,12 +143,12 @@ class VerboseOptimization(object):
def print_status(self, me, which=None): def print_status(self, me, which=None):
self.update() self.update()
seconds = time.time()-self.start t = time.time()
seconds = t-self.start
#sys.stdout.write(" "*len(self.message)) #sys.stdout.write(" "*len(self.message))
self.deltat += seconds if t-self._time > .3 or seconds < .3:
if self.deltat > .2:
self.print_out(seconds) self.print_out(seconds)
self.deltat = 0 self._time = t
self.iteration += 1 self.iteration += 1

View file

@ -3,7 +3,7 @@
import numpy as np import numpy as np
try: try:
import pylab as pb from matplotlib import pyplot as pb
except: except:
pass pass
import GPy import GPy

View file

@ -77,7 +77,7 @@ def student_t_approx(optimize=True, plot=True):
debug=True debug=True
if debug: if debug:
m4.optimize(messages=1) m4.optimize(messages=1)
import pylab as pb from matplotlib import pyplot as pb
pb.plot(m4.X, m4.inference_method.f_hat) pb.plot(m4.X, m4.inference_method.f_hat)
pb.plot(m4.X, m4.Y, 'rx') pb.plot(m4.X, m4.Y, 'rx')
m4.plot() m4.plot()

View file

@ -5,7 +5,7 @@
Gaussian Processes regression examples Gaussian Processes regression examples
""" """
try: try:
import pylab as pb from matplotlib import pyplot as pb
except: except:
pass pass
import numpy as np import numpy as np

View file

@ -69,7 +69,7 @@ from .expectation_propagation_dtc import EPDTC
from .dtc import DTC from .dtc import DTC
from .fitc import FITC from .fitc import FITC
from .var_dtc_parallel import VarDTC_minibatch from .var_dtc_parallel import VarDTC_minibatch
#from .svgp import SVGP from .var_gauss import VarGauss
# class FullLatentFunctionData(object): # class FullLatentFunctionData(object):
# #

View file

@ -4,6 +4,8 @@
import numpy as np import numpy as np
from ...core import Model from ...core import Model
from ...core.parameterization import variational from ...core.parameterization import variational
from ...util.linalg import tdot
from GPy.core.parameterization.variational import VariationalPosterior
def infer_newX(model, Y_new, optimize=True, init='L2'): def infer_newX(model, Y_new, optimize=True, init='L2'):
""" """
@ -60,18 +62,19 @@ class InferenceX(Model):
# self.kern.GPU(True) # self.kern.GPU(True)
from copy import deepcopy from copy import deepcopy
self.posterior = deepcopy(model.posterior) self.posterior = deepcopy(model.posterior)
if hasattr(model, 'variational_prior'): from ...core.parameterization.variational import VariationalPosterior
if isinstance(model.X, VariationalPosterior):
self.uncertain_input = True self.uncertain_input = True
from ...models.ss_gplvm import IBPPrior from ...models.ss_gplvm import IBPPrior
from ...models.ss_mrd import IBPPrior_SSMRD from ...models.ss_mrd import IBPPrior_SSMRD
if isinstance(model.variational_prior, IBPPrior) or isinstance(model.variational_prior, IBPPrior_SSMRD): if isinstance(model.variational_prior, IBPPrior) or isinstance(model.variational_prior, IBPPrior_SSMRD):
from ...core.parameterization.variational import SpikeAndSlabPrior from ...core.parameterization.variational import SpikeAndSlabPrior
self.variational_prior = SpikeAndSlabPrior(pi=05,learnPi=False, group_spike=False) self.variational_prior = SpikeAndSlabPrior(pi=0.5, learnPi=False, group_spike=False)
else: else:
self.variational_prior = model.variational_prior.copy() self.variational_prior = model.variational_prior.copy()
else: else:
self.uncertain_input = False self.uncertain_input = False
if hasattr(model, 'inducing_inputs'): if hasattr(model, 'Z'):
self.sparse_gp = True self.sparse_gp = True
self.Z = model.Z.copy() self.Z = model.Z.copy()
else: else:
@ -125,13 +128,13 @@ class InferenceX(Model):
wv = wv[:,self.valid_dim] wv = wv[:,self.valid_dim]
output_dim = self.valid_dim.sum() output_dim = self.valid_dim.sum()
if self.ninan is not None: if self.ninan is not None:
self.dL_dpsi2 = beta/2.*(self.posterior.woodbury_inv[:,:,self.valid_dim] - np.einsum('md,od->mo',wv, wv)[:, :, None]).sum(-1) self.dL_dpsi2 = beta/2.*(self.posterior.woodbury_inv[:,:,self.valid_dim] - tdot(wv)[:, :, None]).sum(-1)
else: else:
self.dL_dpsi2 = beta/2.*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv)) self.dL_dpsi2 = beta/2.*(output_dim*self.posterior.woodbury_inv - tdot(wv))
self.dL_dpsi1 = beta*np.dot(self.Y[:,self.valid_dim], wv.T) self.dL_dpsi1 = beta*np.dot(self.Y[:,self.valid_dim], wv.T)
self.dL_dpsi0 = - beta/2.* np.ones(self.Y.shape[0]) self.dL_dpsi0 = - beta/2.* np.ones(self.Y.shape[0])
else: else:
self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))/2. self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - tdot(wv))/2. #np.einsum('md,od->mo',wv, wv)
self.dL_dpsi1 = beta*np.dot(self.Y, wv.T) self.dL_dpsi1 = beta*np.dot(self.Y, wv.T)
self.dL_dpsi0 = -beta/2.*output_dim* np.ones(self.Y.shape[0]) self.dL_dpsi0 = -beta/2.*output_dim* np.ones(self.Y.shape[0])

View file

@ -172,6 +172,7 @@ class Laplace(LatentFunctionInference):
def obj(Ki_f, f): def obj(Ki_f, f):
ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata)) ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
if np.isnan(ll): if np.isnan(ll):
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
return -np.inf return -np.inf
else: else:
return ll return ll

View file

@ -64,9 +64,7 @@ class VarDTC(LatentFunctionInference):
def get_VVTfactor(self, Y, prec): def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective return Y * prec # TODO chache this, and make it effective
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None):
_, output_dim = Y.shape _, output_dim = Y.shape
uncertain_inputs = isinstance(X, VariationalPosterior) uncertain_inputs = isinstance(X, VariationalPosterior)
@ -95,17 +93,28 @@ class VarDTC(LatentFunctionInference):
# The rather complex computations of A, and the psi stats # The rather complex computations of A, and the psi stats
if uncertain_inputs: if uncertain_inputs:
psi0 = kern.psi0(Z, X) if psi0 is None:
psi1 = kern.psi1(Z, X) psi0 = kern.psi0(Z, X)
if psi1 is None:
psi1 = kern.psi1(Z, X)
if het_noise: if het_noise:
psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0) if psi2 is None:
assert len(psi2.shape) == 3 # Need to have not summed out N
#FIXME: Need testing
psi2_beta = np.sum([psi2[X[i:i+1,:], :, :] * beta_i for i,beta_i in enumerate(beta)],0)
else:
psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
else: else:
psi2_beta = kern.psi2(Z,X) * beta if psi2 is None:
psi2 = kern.psi2(Z,X)
psi2_beta = psi2 * beta
LmInv = dtrtri(Lm) LmInv = dtrtri(Lm)
A = LmInv.dot(psi2_beta.dot(LmInv.T)) A = LmInv.dot(psi2_beta.dot(LmInv.T))
else: else:
psi0 = kern.Kdiag(X) if psi0 is None:
psi1 = kern.K(X, Z) psi0 = kern.Kdiag(X)
if psi1 is None:
psi1 = kern.K(X, Z)
if het_noise: if het_noise:
tmp = psi1 * (np.sqrt(beta)) tmp = psi1 * (np.sqrt(beta))
else: else:

View file

@ -172,18 +172,23 @@ class VarDTC_minibatch(LatentFunctionInference):
if not np.isfinite(Kmm).all(): if not np.isfinite(Kmm).all():
print(Kmm) print(Kmm)
Lm = jitchol(Kmm) Lm = jitchol(Kmm)
LmInv = dtrtri(Lm)
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right') LmInvPsi2LmInvT = LmInv.dot(psi2_full.dot(LmInv.T))
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
LL = jitchol(Lambda) LL = jitchol(Lambda)
LLInv = dtrtri(LL)
logdet_L = 2.*np.sum(np.log(np.diag(LL))) logdet_L = 2.*np.sum(np.log(np.diag(LL)))
b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0] LmLLInv = LLInv.dot(LmInv)
b = psi1Y_full.dot(LmLLInv.T)
bbt = np.square(b).sum() bbt = np.square(b).sum()
v = dtrtrs(Lm,dtrtrs(LL,b,trans=1)[0],trans=1)[0] v = b.dot(LmLLInv).T
LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
tmp = -backsub_both_sides(LL, tdot(b)+output_dim*np.eye(input_dim), transpose='left')
dL_dpsi2R = backsub_both_sides(Lm, tmp+output_dim*np.eye(input_dim), transpose='left')/2. tmp = -LLInv.T.dot(LLinvPsi1TYYTPsi1LLinvT+output_dim*np.eye(input_dim)).dot(LLInv)
dL_dpsi2R = LmInv.T.dot(tmp+output_dim*np.eye(input_dim)).dot(LmInv)/2.
# Cache intermediate results # Cache intermediate results
self.midRes['dL_dpsi2R'] = dL_dpsi2R self.midRes['dL_dpsi2R'] = dL_dpsi2R
self.midRes['v'] = v self.midRes['v'] = v
@ -201,7 +206,7 @@ class VarDTC_minibatch(LatentFunctionInference):
# Compute dL_dKmm # Compute dL_dKmm
#====================================================================== #======================================================================
dL_dKmm = dL_dpsi2R - output_dim*backsub_both_sides(Lm, LmInvPsi2LmInvT, transpose='left')/2. dL_dKmm = dL_dpsi2R - output_dim*LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.
#====================================================================== #======================================================================
# Compute the Posterior distribution of inducing points p(u|Y) # Compute the Posterior distribution of inducing points p(u|Y)

View file

@ -0,0 +1,69 @@
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ...util.linalg import pdinv
from .posterior import Posterior
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
class VarGauss(LatentFunctionInference):
"""
The Variational Gaussian Approximation revisited
@article{Opper:2009,
title = {The Variational Gaussian Approximation Revisited},
author = {Opper, Manfred and Archambeau, C{\'e}dric},
journal = {Neural Comput.},
year = {2009},
pages = {786--792},
}
"""
def __init__(self, alpha, beta):
"""
:param alpha: GPy.core.Param varational parameter
:param beta: GPy.core.Param varational parameter
"""
self.alpha, self.beta = alpha, beta
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, Z=None):
if mean_function is not None:
raise NotImplementedError
num_data, output_dim = Y.shape
assert output_dim ==1, "Only one output supported"
K = kern.K(X)
m = K.dot(self.alpha)
KB = K*self.beta[:, None]
BKB = KB*self.beta[None, :]
A = np.eye(num_data) + BKB
Ai, LA, _, Alogdet = pdinv(A)
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
var = np.diag(Sigma).reshape(-1,1)
F, dF_dm, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, m, var, Y_metadata=Y_metadata)
if dF_dthetaL is not None:
dL_dthetaL = dF_dthetaL.sum(1).sum(1)
else:
dL_dthetaL = np.array([])
dF_da = np.dot(K, dF_dm)
SigmaB = Sigma*self.beta
#dF_db_ = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
dF_db = -2*np.sum(Sigma**2 * (dF_dv * self.beta), 0)
#assert np.allclose(dF_db, dF_db_)
KL = 0.5*(Alogdet + np.trace(Ai) - num_data + np.sum(m*self.alpha))
dKL_da = m
A_A2 = Ai - Ai.dot(Ai)
dKL_db = np.diag(np.dot(KB.T, A_A2))
log_marginal = F.sum() - KL
self.alpha.gradient = dF_da - dKL_da
self.beta.gradient = dF_db - dKL_db
# K-gradients
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
tmp = Ai*self.beta[:, None]/self.beta[None, :]
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
return Posterior(mean=m, cov=Sigma ,K=K),\
log_marginal,\
{'dL_dK':dF_dK-dKL_dK, 'dL_dthetaL':dL_dthetaL}

View file

@ -1 +1,2 @@
from .hmc import HMC from .hmc import HMC
from .samplers import *

View file

@ -1,14 +1,10 @@
# ## Copyright (c) 2014, Zhenwen Dai # ## Copyright (c) 2014, Zhenwen Dai
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from __future__ import print_function
import numpy as np import numpy as np
from scipy import linalg, optimize
import Tango
import sys import sys
import re
import numdifftools as ndt
import pdb
try: try:
#In Python 2, cPickle is faster. It does not exist in Python 3 but the underlying code is always used #In Python 2, cPickle is faster. It does not exist in Python 3 but the underlying code is always used
@ -22,11 +18,11 @@ class Metropolis_Hastings:
def __init__(self,model,cov=None): def __init__(self,model,cov=None):
"""Metropolis Hastings, with tunings according to Gelman et al. """ """Metropolis Hastings, with tunings according to Gelman et al. """
self.model = model self.model = model
current = self.model._get_params_transformed() current = self.model.optimizer_array
self.D = current.size self.D = current.size
self.chains = [] self.chains = []
if cov is None: if cov is None:
self.cov = model.Laplace_covariance() self.cov = np.eye(self.D)
else: else:
self.cov = cov self.cov = cov
self.scale = 2.4/np.sqrt(self.D) self.scale = 2.4/np.sqrt(self.D)
@ -37,20 +33,20 @@ class Metropolis_Hastings:
if start is None: if start is None:
self.model.randomize() self.model.randomize()
else: else:
self.model._set_params_transformed(start) self.model.optimizer_array = start
def sample(self, Ntotal=10000, Nburn=1000, Nthin=10, tune=True, tune_throughout=False, tune_interval=400):
current = self.model.optimizer_array
def sample(self, Ntotal, Nburn, Nthin, tune=True, tune_throughout=False, tune_interval=400): fcurrent = self.model.log_likelihood() + self.model.log_prior() + \
current = self.model._get_params_transformed() self.model._log_det_jacobian()
fcurrent = self.model.log_likelihood() + self.model.log_prior()
accepted = np.zeros(Ntotal,dtype=np.bool) accepted = np.zeros(Ntotal,dtype=np.bool)
for it in range(Ntotal): for it in range(Ntotal):
print("sample %d of %d\r"%(it,Ntotal), end=' ') print("sample %d of %d\r"%(it,Ntotal),end="\t")
sys.stdout.flush() sys.stdout.flush()
prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale) prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
self.model._set_params_transformed(prop) self.model.optimizer_array = prop
fprop = self.model.log_likelihood() + self.model.log_prior() fprop = self.model.log_likelihood() + self.model.log_prior() + \
self.model._log_det_jacobian()
if fprop>fcurrent:#sample accepted, going 'uphill' if fprop>fcurrent:#sample accepted, going 'uphill'
accepted[it] = True accepted[it] = True
@ -78,10 +74,11 @@ class Metropolis_Hastings:
def predict(self,function,args): def predict(self,function,args):
"""Make a prediction for the function, to which we will pass the additional arguments""" """Make a prediction for the function, to which we will pass the additional arguments"""
param = self.model._get_params() param = self.model.param_array
fs = [] fs = []
for p in self.chain: for p in self.chain:
self.model._set_params(p) self.model.param_array = p
fs.append(function(*args)) fs.append(function(*args))
self.model._set_params(param)# reset model to starting state # reset model to starting state
self.model.param_array = param
return fs return fs

View file

@ -5,7 +5,7 @@ class StochasticStorage(object):
''' '''
This is a container for holding the stochastic parameters, This is a container for holding the stochastic parameters,
such as subset indices or step length and so on. such as subset indices or step length and so on.
self.d has to be a list of lists: self.d has to be a list of lists:
[dimension indices, nan indices for those dimensions] [dimension indices, nan indices for those dimensions]
so that the minibatches can be used as efficiently as possible.10 so that the minibatches can be used as efficiently as possible.10
@ -38,16 +38,17 @@ class SparseGPMissing(StochasticStorage):
import numpy as np import numpy as np
self.Y = model.Y_normalized self.Y = model.Y_normalized
bdict = {} bdict = {}
#For N > 1000 array2string default crops
opt = np.get_printoptions()
np.set_printoptions(threshold=np.inf)
for d in range(self.Y.shape[1]): for d in range(self.Y.shape[1]):
inan = np.isnan(self.Y[:, d]) inan = np.isnan(self.Y)[:, d]
arr_str = np.array2string(inan, arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
np.inf, 0,
True, '',
formatter={'bool':lambda x: '1' if x else '0'})
try: try:
bdict[arr_str][0].append(d) bdict[arr_str][0].append(d)
except: except:
bdict[arr_str] = [[d], ~inan] bdict[arr_str] = [[d], ~inan]
np.set_printoptions(**opt)
self.d = bdict.values() self.d = bdict.values()
class SparseGPStochastics(StochasticStorage): class SparseGPStochastics(StochasticStorage):
@ -55,32 +56,36 @@ class SparseGPStochastics(StochasticStorage):
For the sparse gp we need to store the dimension we are in, For the sparse gp we need to store the dimension we are in,
and the indices corresponding to those and the indices corresponding to those
""" """
def __init__(self, model, batchsize=1): def __init__(self, model, batchsize=1, missing_data=True):
self.batchsize = batchsize self.batchsize = batchsize
self.output_dim = model.Y.shape[1] self.output_dim = model.Y.shape[1]
self.Y = model.Y_normalized self.Y = model.Y_normalized
self.missing_data = missing_data
self.reset() self.reset()
self.do_stochastics() self.do_stochastics()
def do_stochastics(self): def do_stochastics(self):
import numpy as np
if self.batchsize == 1: if self.batchsize == 1:
self.current_dim = (self.current_dim+1)%self.output_dim self.current_dim = (self.current_dim+1)%self.output_dim
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.d])]] self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
else: else:
import numpy as np
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False) self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
bdict = {} bdict = {}
for d in self.d: if self.missing_data:
inan = np.isnan(self.Y[:, d]) opt = np.get_printoptions()
arr_str = int(np.array2string(inan, np.set_printoptions(threshold=np.inf)
np.inf, 0, for d in self.d:
True, '', inan = np.isnan(self.Y[:, d])
formatter={'bool':lambda x: '1' if x else '0'}), 2) arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
try: try:
bdict[arr_str][0].append(d) bdict[arr_str][0].append(d)
except: except:
bdict[arr_str] = [[d], ~inan] bdict[arr_str] = [[d], ~inan]
self.d = bdict.values() np.set_printoptions(**opt)
self.d = bdict.values()
else:
self.d = [[self.d, None]]
def reset(self): def reset(self):
self.current_dim = -1 self.current_dim = -1

View file

@ -6,6 +6,7 @@ from ._src.brownian import Brownian
from ._src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine from ._src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from ._src.mlp import MLP from ._src.mlp import MLP
from ._src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 from ._src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
from ._src.standard_periodic import StdPeriodic
from ._src.independent_outputs import IndependentOutputs, Hierarchical from ._src.independent_outputs import IndependentOutputs, Hierarchical
from ._src.coregionalize import Coregionalize from ._src.coregionalize import Coregionalize
from ._src.ODE_UY import ODE_UY from ._src.ODE_UY import ODE_UY
@ -17,7 +18,7 @@ from ._src.eq_ode2 import EQ_ODE2
from ._src.trunclinear import TruncLinear,TruncLinear_inf from ._src.trunclinear import TruncLinear,TruncLinear_inf
from ._src.splitKern import SplitKern,DEtime from ._src.splitKern import SplitKern,DEtime
from ._src.splitKern import DEtime as DiffGenomeKern from ._src.splitKern import DEtime as DiffGenomeKern
from ._src.spline import Spline
from ._src.eq_ode2 import EQ_ODE2
from ._src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel from ._src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel

View file

@ -14,7 +14,7 @@ class Add(CombinationKernel):
This kernel will take over the active dims of it's subkernels passed in. This kernel will take over the active dims of it's subkernels passed in.
""" """
def __init__(self, subkerns, name='add'): def __init__(self, subkerns, name='sum'):
for i, kern in enumerate(subkerns[:]): for i, kern in enumerate(subkerns[:]):
if isinstance(kern, Add): if isinstance(kern, Add):
del subkerns[i] del subkerns[i]
@ -71,16 +71,29 @@ class Add(CombinationKernel):
target = np.zeros(X.shape) target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target return target
@Cache_this(limit=2, force_kwargs=['which_parts']) def gradients_XX(self, dL_dK, X, X2):
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0], X.shape[1]))
else:
target = np.zeros((X.shape[0], X2.shape[0], X.shape[1]))
[target.__iadd__(p.gradients_XX(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_XX_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_XX_diag(dL_dKdiag, X)) for p in self.parts]
return target
@Cache_this(limit=1, force_kwargs=['which_parts'])
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts)) return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=2, force_kwargs=['which_parts']) @Cache_this(limit=1, force_kwargs=['which_parts'])
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts)) return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=2, force_kwargs=['which_parts']) @Cache_this(limit=1, force_kwargs=['which_parts'])
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts)) psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
#return psi2 #return psi2
@ -115,6 +128,41 @@ class Add(CombinationKernel):
raise NotImplementedError("psi2 cannot be computed for this kernel") raise NotImplementedError("psi2 cannot be computed for this kernel")
return psi2 return psi2
@Cache_this(limit=1, force_kwargs=['which_parts'])
def psi2n(self, Z, variational_posterior):
psi2 = reduce(np.add, (p.psi2n(Z, variational_posterior) for p in self.parts))
#return psi2
# compute the "cross" terms
from .static import White, Bias
from .rbf import RBF
#from rbf_inv import RBFInv
from .linear import Linear
#ffrom fixed import Fixed
for p1, p2 in itertools.combinations(self.parts, 2):
# i1, i2 = p1.active_dims, p2.active_dims
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
tmp = p2.psi1(Z, variational_posterior).sum(axis=0)
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z, variational_posterior).sum(axis=0)
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
tmp1 = p1.psi1(Z, variational_posterior)
tmp2 = p2.psi1(Z, variational_posterior)
psi2 += np.einsum('nm,no->nmo',tmp1,tmp2)+np.einsum('nm,no->nmo',tmp2,tmp1)
#(tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
else:
raise NotImplementedError("psi2 cannot be computed for this kernel")
return psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from .static import White, Bias from .static import White, Bias
for p1 in self.parts: for p1 in self.parts:
@ -126,9 +174,9 @@ class Add(CombinationKernel):
if isinstance(p2, White): if isinstance(p2, White):
continue continue
elif isinstance(p2, Bias): elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
@ -143,9 +191,9 @@ class Add(CombinationKernel):
if isinstance(p2, White): if isinstance(p2, White):
continue continue
elif isinstance(p2, Bias): elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else: else:
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
return target return target
@ -161,9 +209,9 @@ class Add(CombinationKernel):
if isinstance(p2, White): if isinstance(p2, White):
continue continue
elif isinstance(p2, Bias): elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else: else:
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
[np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))] [np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))]
return target_grads return target_grads

View file

@ -11,7 +11,7 @@ class BasisFuncKernel(Kern):
def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'): def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'):
""" """
Abstract superclass for kernels with explicit basis functions for use in GPy. Abstract superclass for kernels with explicit basis functions for use in GPy.
This class does NOT automatically add an offset to the design matrix phi! This class does NOT automatically add an offset to the design matrix phi!
""" """
super(BasisFuncKernel, self).__init__(input_dim, active_dims, name) super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
@ -23,24 +23,24 @@ class BasisFuncKernel(Kern):
variance = np.array(variance) variance = np.array(variance)
self.variance = Param('variance', variance, Logexp()) self.variance = Param('variance', variance, Logexp())
self.link_parameter(self.variance) self.link_parameter(self.variance)
def parameters_changed(self): def parameters_changed(self):
self.alpha = np.sqrt(self.variance) self.alpha = np.sqrt(self.variance)
self.beta = 1./self.variance self.beta = 1./self.variance
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def phi(self, X): def phi(self, X):
return self._phi(X) return self._phi(X)
def _phi(self, X): def _phi(self, X):
raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi') raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi')
def K(self, X, X2=None): def K(self, X, X2=None):
return self._K(X, X2) return self._K(X, X2)
def Kdiag(self, X, X2=None): def Kdiag(self, X, X2=None):
return np.diag(self._K(X, X2)) return np.diag(self._K(X, X2))
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
if self.ARD: if self.ARD:
phi1 = self.phi(X) phi1 = self.phi(X)
@ -51,22 +51,22 @@ class BasisFuncKernel(Kern):
self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2) self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2)
else: else:
self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) * self.beta self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) * self.beta
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
if self.ARD: if self.ARD:
phi1 = self.phi(X) phi1 = self.phi(X)
self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1) self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1)
else: else:
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta
def concatenate_offset(self, X): def concatenate_offset(self, X):
return np.c_[np.ones((X.shape[0], 1)), X] return np.c_[np.ones((X.shape[0], 1)), X]
def posterior_inf(self, X=None, posterior=None): def posterior_inf(self, X=None, posterior=None):
""" """
Do the posterior inference on the parameters given this kernels functions Do the posterior inference on the parameters given this kernels functions
and the model posterior, which has to be a GPy posterior, usually found at m.posterior, if m is a GPy model. and the model posterior, which has to be a GPy posterior, usually found at m.posterior, if m is a GPy model.
If not given we search for the the highest parent to be a model, containing the posterior, and for X accordingly. If not given we search for the the highest parent to be a model, containing the posterior, and for X accordingly.
""" """
if X is None: if X is None:
try: try:
@ -80,7 +80,7 @@ class BasisFuncKernel(Kern):
raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference") raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference")
phi_alpha = self.phi(X) * self.variance phi_alpha = self.phi(X) * self.variance
return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha)) return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha))
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def _K(self, X, X2): def _K(self, X, X2):
if X2 is None or X is X2: if X2 is None or X is X2:
@ -95,35 +95,35 @@ class BasisFuncKernel(Kern):
phi1 = phi1[:, None] phi1 = phi1[:, None]
phi2 = phi2[:, None] phi2 = phi2[:, None]
return phi1.dot(phi2.T) return phi1.dot(phi2.T)
class LinearSlopeBasisFuncKernel(BasisFuncKernel): class LinearSlopeBasisFuncKernel(BasisFuncKernel):
def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'): def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'):
""" """
A linear segment transformation. The segments start at start, \ A linear segment transformation. The segments start at start, \
are then linear to stop and constant again. The segments are are then linear to stop and constant again. The segments are
normalized, so that they have exactly as much mass above normalized, so that they have exactly as much mass above
as below the origin. as below the origin.
Start and stop can be tuples or lists of starts and stops. Start and stop can be tuples or lists of starts and stops.
Behaviour of start stop is as np.where(X<start) would do. Behaviour of start stop is as np.where(X<start) would do.
""" """
self.start = np.array(start) self.start = np.array(start)
self.stop = np.array(stop) self.stop = np.array(stop)
super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name) super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def _phi(self, X): def _phi(self, X):
phi = np.where(X < self.start, self.start, X) phi = np.where(X < self.start, self.start, X)
phi = np.where(phi > self.stop, self.stop, phi) phi = np.where(phi > self.stop, self.stop, phi)
return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1. return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.
class ChangePointBasisFuncKernel(BasisFuncKernel): class ChangePointBasisFuncKernel(BasisFuncKernel):
def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'): def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'):
self.changepoint = np.array(changepoint) self.changepoint = np.array(changepoint)
super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name) super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def _phi(self, X): def _phi(self, X):
return np.where((X < self.changepoint), -1, 1) return np.where((X < self.changepoint), -1, 1)
@ -131,7 +131,7 @@ class ChangePointBasisFuncKernel(BasisFuncKernel):
class DomainKernel(LinearSlopeBasisFuncKernel): class DomainKernel(LinearSlopeBasisFuncKernel):
def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'): def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'):
super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name) super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def _phi(self, X): def _phi(self, X):
phi = np.where((X>self.start)*(X<self.stop), 1, 0) phi = np.where((X>self.start)*(X<self.stop), 1, 0)
@ -147,7 +147,7 @@ class LogisticBasisFuncKernel(BasisFuncKernel):
self.slope = Param('slope', slope, Logexp()) self.slope = Param('slope', slope, Logexp())
super(LogisticBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name) super(LogisticBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
self.link_parameter(self.slope) self.link_parameter(self.slope)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def _phi(self, X): def _phi(self, X):
import scipy as sp import scipy as sp
@ -156,7 +156,7 @@ class LogisticBasisFuncKernel(BasisFuncKernel):
def parameters_changed(self): def parameters_changed(self):
BasisFuncKernel.parameters_changed(self) BasisFuncKernel.parameters_changed(self)
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
super(LogisticBasisFuncKernel, self).update_gradients_full(dL_dK, X, X2) super(LogisticBasisFuncKernel, self).update_gradients_full(dL_dK, X, X2)
if X2 is None or X is X2: if X2 is None or X is X2:

View file

@ -6,7 +6,11 @@ import numpy as np
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
from ...util.config import config # for assesing whether to use cython from ...util.config import config # for assesing whether to use cython
from . import coregionalize_cython try:
from . import coregionalize_cython
config.set('cython', 'working', 'True')
except ImportError:
config.set('cython', 'working', 'False')
class Coregionalize(Kern): class Coregionalize(Kern):
""" """
@ -94,7 +98,7 @@ class Coregionalize(Kern):
dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2) dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)
dkappa = np.diag(dL_dK_small) dkappa = np.diag(dL_dK_small).copy()
dL_dK_small += dL_dK_small.T dL_dK_small += dL_dK_small.T
dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0) dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
@ -111,7 +115,7 @@ class Coregionalize(Kern):
return dL_dK_small return dL_dK_small
def _gradient_reduce_cython(self, dL_dK, index, index2): def _gradient_reduce_cython(self, dL_dK, index, index2):
index, index2 = index[:,0], index2[:,0] index, index2 = np.int64(index[:,0]), np.int64(index2[:,0])
return coregionalize_cython.gradient_reduce(self.B.shape[0], dL_dK, index, index2) return coregionalize_cython.gradient_reduce(self.B.shape[0], dL_dK, index, index2)
@ -126,4 +130,3 @@ class Coregionalize(Kern):
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape) return np.zeros(X.shape)

File diff suppressed because it is too large Load diff

View file

@ -1,33 +1,37 @@
#cython: boundscheck=True #cython: boundscheck=False
#cython: wraparound=True #cython: wraparound=False
#cython: nonecheck=False
import cython import cython
import numpy as np import numpy as np
cimport numpy as np cimport numpy as np
def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X): def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X):
cdef int N = X.size cdef int N = X.size
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, N)) cdef np.ndarray[np.double_t, ndim=2, mode='c'] K = np.empty((N, N))
for n in range(N): with nogil:
for m in range(N): for n in range(N):
K[n,m] = B[X[n],X[m]] for m in range(N):
K[n, m] = B[X[n], X[m]]
return K return K
def K_asymmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X, np.ndarray[np.int64_t, ndim=1] X2): def K_asymmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X, np.ndarray[np.int64_t, ndim=1] X2):
cdef int N = X.size cdef int N = X.size
cdef int M = X2.size cdef int M = X2.size
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, M)) cdef np.ndarray[np.double_t, ndim=2, mode='c'] K = np.empty((N, M))
for n in range(N): with nogil:
for m in range(M): for n in range(N):
K[n,m] = B[X[n],X2[m]] for m in range(M):
K[n, m] = B[X[n], X2[m]]
return K return K
def gradient_reduce(int D, np.ndarray[double, ndim=2] dL_dK, np.ndarray[np.int64_t, ndim=1] index, np.ndarray[np.int64_t, ndim=1] index2): def gradient_reduce(int D, np.ndarray[double, ndim=2] dL_dK, np.ndarray[np.int64_t, ndim=1] index, np.ndarray[np.int64_t, ndim=1] index2):
cdef np.ndarray[np.double_t, ndim=2] dL_dK_small = np.zeros((D, D)) cdef np.ndarray[np.double_t, ndim=2, mode='c'] dL_dK_small = np.zeros((D, D))
cdef int N = index.size cdef int N = index.size
cdef int M = index2.size cdef int M = index2.size
for i in range(N): with nogil:
for j in range(M): for i in range(N):
dL_dK_small[index2[j],index[i]] += dL_dK[i,j]; for j in range(M):
dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
return dL_dK_small return dL_dK_small

View file

@ -105,7 +105,7 @@ class IndependentOutputs(CombinationKernel):
if X2 is None: if X2 is None:
# TODO: make use of index_to_slices # TODO: make use of index_to_slices
# FIXME: Broken as X is already sliced out # FIXME: Broken as X is already sliced out
print("Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!") # print("Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!")
values = np.unique(X[:,self.index_dim]) values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values] slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None)) [target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))

View file

@ -58,20 +58,9 @@ class Kern(Parameterized):
self._sliced_X = 0 self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU self.useGPU = self._support_GPU and useGPU
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool)
@property from .psi_comp import PSICOMP_GH
def return_psi2_n(self): self.psicomp = PSICOMP_GH()
"""
Flag whether to pass back psi2 as NxMxM or MxM, by summing out N.
"""
return self._return_psi2_n_flag[0]
@return_psi2_n.setter
def return_psi2_n(self, val):
def visit(self):
if isinstance(self, Kern):
self._return_psi2_n_flag[0]=val
self.traverse(visit)
@Cache_this(limit=20) @Cache_this(limit=20)
def _slice_X(self, X): def _slice_X(self, X):
@ -81,6 +70,9 @@ class Kern(Parameterized):
""" """
Compute the kernel function. Compute the kernel function.
.. math::
K_{ij} = k(X_i, X_j)
:param X: the first set of inputs to the kernel :param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2 :param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which is None, this is passed throgh to the 'part' object, which
@ -88,16 +80,64 @@ class Kern(Parameterized):
""" """
raise NotImplementedError raise NotImplementedError
def Kdiag(self, X): def Kdiag(self, X):
"""
The diagonal of the kernel matrix K
.. math::
Kdiag_{i} = k(X_i, X_i)
"""
raise NotImplementedError raise NotImplementedError
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
raise NotImplementedError """
.. math::
\psi_0 = \sum_{i=0}^{n}E_{q(X)}[k(X_i, X_i)]
"""
return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
raise NotImplementedError """
.. math::
\psi_1^{n,m} = E_{q(X)}[k(X_n, Z_m)]
"""
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
raise NotImplementedError """
.. math::
\psi_2^{m,m'} = \sum_{i=0}^{n}E_{q(X)}[ k(Z_m, X_i) k(X_i, Z_{m'})]
"""
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
def psi2n(self, Z, variational_posterior):
"""
.. math::
\psi_2^{n,m,m'} = E_{q(X)}[ k(Z_m, X_n) k(X_n, Z_{m'})]
Thus, we do not sum out n, compared to psi2
"""
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
def gradients_X(self, dL_dK, X, X2): def gradients_X(self, dL_dK, X, X2):
"""
.. math::
\\frac{\partial L}{\partial X} = \\frac{\partial L}{\partial K}\\frac{\partial K}{\partial X}
"""
raise NotImplementedError raise NotImplementedError
def gradients_X_X2(self, dL_dK, X, X2):
return self.gradients_X(dL_dK, X, X2), self.gradients_X(dL_dK.T, X2, X)
def gradients_XX(self, dL_dK, X, X2):
"""
.. math::
\\frac{\partial^2 L}{\partial X\partial X_2} = \\frac{\partial L}{\partial K}\\frac{\partial^2 K}{\partial X\partial X_2}
"""
raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_XX_diag(self, dL_dKdiag, X):
"""
The diagonal of the second derivative w.r.t. X and X2
"""
raise(NotImplementedError, "This is the diagonal of the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
"""
The diagonal of the derivative w.r.t. X
"""
raise NotImplementedError raise NotImplementedError
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
@ -113,27 +153,35 @@ class Kern(Parameterized):
Set the gradients of all parameters when doing inference with Set the gradients of all parameters when doing inference with
uncertain inputs, using expectations of the kernel. uncertain inputs, using expectations of the kernel.
The esential maths is The essential maths is
dL_d{theta_i} = dL_dpsi0 * dpsi0_d{theta_i} + .. math::
dL_dpsi1 * dpsi1_d{theta_i} +
dL_dpsi2 * dpsi2_d{theta_i} \\frac{\partial L}{\partial \\theta_i} & = \\frac{\partial L}{\partial \psi_0}\\frac{\partial \psi_0}{\partial \\theta_i}\\
& \quad + \\frac{\partial L}{\partial \psi_1}\\frac{\partial \psi_1}{\partial \\theta_i}\\
& \quad + \\frac{\partial L}{\partial \psi_2}\\frac{\partial \psi_2}{\partial \\theta_i}
Thus, we push the different derivatives through the gradients of the psi
statistics. Be sure to set the gradients for all kernel
parameters here.
""" """
raise NotImplementedError dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0]
self.gradient[:] = dtheta
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=None, psi1=None, psi2=None):
""" """
Returns the derivative of the objective wrt Z, using the chain rule Returns the derivative of the objective wrt Z, using the chain rule
through the expectation variables. through the expectation variables.
""" """
raise NotImplementedError return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[1]
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
""" """
Compute the gradients wrt the parameters of the variational Compute the gradients wrt the parameters of the variational
distruibution q(X), chain-ruling via the expectations of the kernel distruibution q(X), chain-ruling via the expectations of the kernel
""" """
raise NotImplementedError return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2:]
def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs): def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs):
""" """
@ -172,7 +220,7 @@ class Kern(Parameterized):
def __iadd__(self, other): def __iadd__(self, other):
return self.add(other) return self.add(other)
def add(self, other, name='add'): def add(self, other, name='sum'):
""" """
Add another kernel to this one. Add another kernel to this one.
@ -208,8 +256,6 @@ class Kern(Parameterized):
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool
""" """
assert isinstance(other, Kern), "only kernels can be multiplied to kernels..." assert isinstance(other, Kern), "only kernels can be multiplied to kernels..."

View file

@ -1,7 +1,11 @@
''' '''
Created on 11 Mar 2014 Created on 11 Mar 2014
@author: maxz @author: @mzwiessele
This module provides a meta class for the kernels. The meta class is for
slicing the inputs (X, X2) for the kernels, before K (or any other method involving X)
gets calls. The `active_dims` of a kernel decide which dimensions the kernel works on.
''' '''
from ...core.parameterization.parameterized import ParametersChangedMeta from ...core.parameterization.parameterized import ParametersChangedMeta
import numpy as np import numpy as np
@ -19,20 +23,27 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full) put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag) put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
put_clean(dct, 'gradients_X', _slice_gradients_X) put_clean(dct, 'gradients_X', _slice_gradients_X)
put_clean(dct, 'gradients_X_X2', _slice_gradients_X)
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
put_clean(dct, 'psi0', _slice_psi) put_clean(dct, 'psi0', _slice_psi)
put_clean(dct, 'psi1', _slice_psi) put_clean(dct, 'psi1', _slice_psi)
put_clean(dct, 'psi2', _slice_psi) put_clean(dct, 'psi2', _slice_psi)
put_clean(dct, 'psi2n', _slice_psi)
put_clean(dct, 'update_gradients_expectations', _slice_update_gradients_expectations) put_clean(dct, 'update_gradients_expectations', _slice_update_gradients_expectations)
put_clean(dct, 'gradients_Z_expectations', _slice_gradients_Z_expectations) put_clean(dct, 'gradients_Z_expectations', _slice_gradients_Z_expectations)
put_clean(dct, 'gradients_qX_expectations', _slice_gradients_qX_expectations) put_clean(dct, 'gradients_qX_expectations', _slice_gradients_qX_expectations)
return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct) return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
class _Slice_wrap(object): class _Slice_wrap(object):
def __init__(self, k, X, X2=None): def __init__(self, k, X, X2=None, ret_shape=None):
self.k = k self.k = k
self.shape = X.shape if ret_shape is None:
self.shape = X.shape
else:
self.shape = ret_shape
assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape) assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape)
if X2 is not None: if X2 is not None:
assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape) assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
@ -54,7 +65,10 @@ class _Slice_wrap(object):
def handle_return_array(self, return_val): def handle_return_array(self, return_val):
if self.ret: if self.ret:
ret = np.zeros(self.shape) ret = np.zeros(self.shape)
ret[:, self.k.active_dims] = return_val if len(self.shape) == 2:
ret[:, self.k.active_dims] = return_val
elif len(self.shape) == 3:
ret[:, :, self.k.active_dims] = return_val
return ret return ret
return return_val return return_val
@ -98,6 +112,19 @@ def _slice_gradients_X(f):
return ret return ret
return wrap return wrap
def _slice_gradients_XX(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
if X2 is None:
N, M = X.shape[0], X.shape[0]
else:
N, M = X.shape[0], X2.shape[0]
with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s:
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
return ret
return wrap
def _slice_gradients_X_diag(f): def _slice_gradients_X_diag(f):
@wraps(f) @wraps(f)
def wrap(self, dL_dKdiag, X): def wrap(self, dL_dKdiag, X):
@ -124,7 +151,8 @@ def _slice_update_gradients_expectations(f):
def _slice_gradients_Z_expectations(f): def _slice_gradients_Z_expectations(f):
@wraps(f) @wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
with _Slice_wrap(self, Z, variational_posterior) as s: with _Slice_wrap(self, Z, variational_posterior) as s:
ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2)) ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2))
return ret return ret
@ -132,7 +160,8 @@ def _slice_gradients_Z_expectations(f):
def _slice_gradients_qX_expectations(f): def _slice_gradients_qX_expectations(f):
@wraps(f) @wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior,
psi0=None, psi1=None, psi2=None, Lpsi0=None, Lpsi1=None, Lpsi2=None):
with _Slice_wrap(self, variational_posterior, Z) as s: with _Slice_wrap(self, variational_posterior, Z) as s:
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X)) ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X))
r2 = ret[:2] r2 = ret[:2]

View file

@ -17,7 +17,7 @@ class Linear(Kern):
.. math:: .. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i k(x,y) = \sum_{i=1}^{\\text{input_dim}} \sigma^2_i x_iy_i
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
@ -100,6 +100,12 @@ class Linear(Kern):
#return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) #return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
return np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK) return np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)
def gradients_XX(self, dL_dK, X, X2=None):
if X2 is None:
return 2*np.ones(X.shape)*self.variances
else:
return np.ones(X.shape)*self.variances
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*X return 2.*self.variances*dL_dKdiag[:,None]*X
@ -111,26 +117,29 @@ class Linear(Kern):
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[0] return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[1] return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[2] return self.psicomp.psicomputations(self, Z, variational_posterior)[2]
def psi2n(self, Z, variational_posterior):
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
dL_dvar = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[0] dL_dvar = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0]
if self.ARD: if self.ARD:
self.variances.gradient = dL_dvar self.variances.gradient = dL_dvar
else: else:
self.variances.gradient = dL_dvar.sum() self.variances.gradient = dL_dvar.sum()
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[1] return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[1]
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[2:] return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2:]
class LinearFull(Kern): class LinearFull(Kern):
def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'): def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'):

View file

@ -5,6 +5,8 @@ from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
import numpy as np import numpy as np
from ...util.linalg import tdot
from ...util.caching import Cache_this
four_over_tau = 2./np.pi four_over_tau = 2./np.pi
class MLP(Kern): class MLP(Kern):
@ -31,105 +33,116 @@ class MLP(Kern):
""" """
def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=100., active_dims=None, name='mlp'): def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=1., ARD=False, active_dims=None, name='mlp'):
super(MLP, self).__init__(input_dim, active_dims, name) super(MLP, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp()) self.variance = Param('variance', variance, Logexp())
self.ARD= ARD
if ARD:
wv = np.empty((input_dim,))
wv[:] = weight_variance
weight_variance = wv
self.weight_variance = Param('weight_variance', weight_variance, Logexp()) self.weight_variance = Param('weight_variance', weight_variance, Logexp())
self.bias_variance = Param('bias_variance', bias_variance, Logexp()) self.bias_variance = Param('bias_variance', bias_variance, Logexp())
self.link_parameters(self.variance, self.weight_variance, self.bias_variance) self.link_parameters(self.variance, self.weight_variance, self.bias_variance)
@Cache_this(limit=20, ignore_args=())
def K(self, X, X2=None): def K(self, X, X2=None):
self._K_computations(X, X2) if X2 is None:
return self.variance*self._K_dvar X_denom = np.sqrt(self._comp_prod(X)+1.)
X2_denom = X_denom
X2 = X
else:
X_denom = np.sqrt(self._comp_prod(X)+1.)
X2_denom = np.sqrt(self._comp_prod(X2)+1.)
XTX = self._comp_prod(X,X2)/X_denom[:,None]/X2_denom[None,:]
return self.variance*four_over_tau*np.arcsin(XTX)
@Cache_this(limit=20, ignore_args=())
def Kdiag(self, X): def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix for X.""" """Compute the diagonal of the covariance matrix for X."""
self._K_diag_computations(X) X_prod = self._comp_prod(X)
return self.variance*self._K_diag_dvar return self.variance*four_over_tau*np.arcsin(X_prod/(X_prod+1.))
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
"""Derivative of the covariance with respect to the parameters.""" """Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2) dvar, dw, db = self._comp_grads(dL_dK, X, X2)[:3]
self.variance.gradient = np.sum(self._K_dvar*dL_dK) self.variance.gradient = dvar
self.weight_variance.gradient = dw
denom3 = self._K_denom**3 self.bias_variance.gradient = db
base = four_over_tau*self.variance/np.sqrt(1-self._K_asin_arg*self._K_asin_arg)
base_cov_grad = base*dL_dK
if X2 is None:
vec = np.diag(self._K_inner_prod)
self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom
-.5*self._K_numer/denom3
*(np.outer((self.weight_variance*vec+self.bias_variance+1.), vec)
+np.outer(vec,(self.weight_variance*vec+self.bias_variance+1.))))*base_cov_grad).sum()
self.bias_variance.gradient = ((1./self._K_denom
-.5*self._K_numer/denom3
*((vec[None, :]+vec[:, None])*self.weight_variance
+2.*self.bias_variance + 2.))*base_cov_grad).sum()
else:
vec1 = (X*X).sum(1)
vec2 = (X2*X2).sum(1)
self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom
-.5*self._K_numer/denom3
*(np.outer((self.weight_variance*vec1+self.bias_variance+1.), vec2) + np.outer(vec1, self.weight_variance*vec2 + self.bias_variance+1.)))*base_cov_grad).sum()
self.bias_variance.gradient = ((1./self._K_denom
-.5*self._K_numer/denom3
*((vec1[:, None]+vec2[None, :])*self.weight_variance
+ 2*self.bias_variance + 2.))*base_cov_grad).sum()
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self._K_diag_computations(X) dvar, dw, db = self._comp_grads_diag(dL_dKdiag, X)[:3]
self.variance.gradient = np.sum(self._K_diag_dvar*dL_dKdiag) self.variance.gradient = dvar
self.weight_variance.gradient = dw
self.bias_variance.gradient = db
base = four_over_tau*self.variance/np.sqrt(1-self._K_diag_asin_arg*self._K_diag_asin_arg)
base_cov_grad = base*dL_dKdiag/np.square(self._K_diag_denom)
self.weight_variance.gradient = (base_cov_grad*np.square(X).sum(axis=1)).sum()
self.bias_variance.gradient = base_cov_grad.sum()
def gradients_X(self, dL_dK, X, X2): def gradients_X(self, dL_dK, X, X2):
"""Derivative of the covariance matrix with respect to X""" """Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2) return self._comp_grads(dL_dK, X, X2)[3]
arg = self._K_asin_arg
numer = self._K_numer def gradients_X_X2(self, dL_dK, X, X2):
denom = self._K_denom """Derivative of the covariance matrix with respect to X"""
denom3 = denom*denom*denom return self._comp_grads(dL_dK, X, X2)[3:]
if X2 is not None:
vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1.
return four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
else:
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
return 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
"""Gradient of diagonal of covariance with respect to X""" """Gradient of diagonal of covariance with respect to X"""
self._K_diag_computations(X) return self._comp_grads_diag(dL_dKdiag, X)[3]
arg = self._K_diag_asin_arg
denom = self._K_diag_denom
#numer = self._K_diag_numer
return four_over_tau*2.*self.weight_variance*self.variance*X*(1./denom*(1. - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None]
@Cache_this(limit=50, ignore_args=())
def _K_computations(self, X, X2): def _comp_prod(self, X, X2=None):
"""Pre-computations for the covariance matrix (used for computing the covariance and its gradients."""
if X2 is None: if X2 is None:
self._K_inner_prod = np.dot(X,X.T) return (np.square(X)*self.weight_variance).sum(axis=1)+self.bias_variance
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
vec = np.diag(self._K_numer) + 1.
self._K_denom = np.sqrt(np.outer(vec,vec))
else: else:
self._K_inner_prod = np.dot(X,X2.T) return (X*self.weight_variance).dot(X2.T)+self.bias_variance
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
vec1 = (X*X).sum(1)*self.weight_variance + self.bias_variance + 1. @Cache_this(limit=20, ignore_args=(1,))
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1. def _comp_grads(self, dL_dK, X, X2=None):
self._K_denom = np.sqrt(np.outer(vec1,vec2)) var,w,b = self.variance, self.weight_variance, self.bias_variance
self._K_asin_arg = self._K_numer/self._K_denom K = self.K(X, X2)
self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg) dvar = (dL_dK*K).sum()/var
X_prod = self._comp_prod(X)
def _K_diag_computations(self, X): X2_prod = self._comp_prod(X2) if X2 is not None else X_prod
"""Pre-computations concerning the diagonal terms (used for computation of diagonal and its gradients).""" XTX = self._comp_prod(X,X2) if X2 is not None else self._comp_prod(X, X)
self._K_diag_numer = (X*X).sum(1)*self.weight_variance + self.bias_variance common = var*four_over_tau/np.sqrt((X_prod[:,None]+1.)*(X2_prod[None,:]+1.)-np.square(XTX))*dL_dK
self._K_diag_denom = self._K_diag_numer+1. if self.ARD:
self._K_diag_asin_arg = self._K_diag_numer/self._K_diag_denom if X2 is not None:
self._K_diag_dvar = four_over_tau*np.arcsin(self._K_diag_asin_arg) XX2 = X[:,None,:]*X2[None,:,:] if X2 is not None else X[:,None,:]*X[None,:,:]
XX = np.square(X)
X2X2 = np.square(X2)
Q = self.weight_variance.shape[0]
common_XTX = common*XTX
dw = np.dot(common.flat,XX2.reshape(-1,Q)) -( (common_XTX.sum(1)/(X_prod+1.)).T.dot(XX)+(common_XTX.sum(0)/(X2_prod+1.)).dot(X2X2))/2
else:
XX2 = X[:,None,:]*X[None,:,:]
XX = np.square(X)
Q = self.weight_variance.shape[0]
common_XTX = common*XTX
dw = np.dot(common.flat,XX2.reshape(-1,Q)) - ((common_XTX.sum(0)+common_XTX.sum(1))/(X_prod+1.)).dot(XX)/2
else:
dw = (common*((XTX-b)/w-XTX*(((X_prod-b)/(w*(X_prod+1.)))[:,None]+((X2_prod-b)/(w*(X2_prod+1.)))[None,:])/2.)).sum()
db = (common*(1.-XTX*(1./(X_prod[:,None]+1.)+1./(X2_prod[None,:]+1.))/2.)).sum()
if X2 is None:
common = common+common.T
dX = common.dot(X)*w-((common*XTX).sum(axis=1)/(X_prod+1.))[:,None]*X*w
dX2 = dX
else:
dX = common.dot(X2)*w-((common*XTX).sum(axis=1)/(X_prod+1.))[:,None]*X*w
dX2 = common.T.dot(X)*w-((common*XTX).sum(axis=0)/(X2_prod+1.))[:,None]*X2*w
return dvar, dw, db, dX, dX2
@Cache_this(limit=20, ignore_args=(1,))
def _comp_grads_diag(self, dL_dKdiag, X):
var,w,b = self.variance, self.weight_variance, self.bias_variance
K = self.Kdiag(X)
dvar = (dL_dKdiag*K).sum()/var
X_prod = self._comp_prod(X)
common = var*four_over_tau/(np.sqrt(1-np.square(X_prod/(X_prod+1)))*np.square(X_prod+1))*dL_dKdiag
if self.ARD:
XX = np.square(X)
dw = np.dot(common,XX)
else:
dw = (common*(X_prod-b)).sum()/w
db = common.sum()
dX = common[:,None]*X*w*2
return dvar, dw, db, dX

View file

@ -27,8 +27,6 @@ class Prod(CombinationKernel):
:param k1, k2: the kernels to multiply :param k1, k2: the kernels to multiply
:type k1, k2: Kern :type k1, k2: Kern
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object :rtype: kernel object
""" """

View file

@ -9,18 +9,34 @@ from . import ssrbf_psi_comp
from . import sslinear_psi_comp from . import sslinear_psi_comp
from . import linear_psi_comp from . import linear_psi_comp
class PSICOMP_RBF(Pickleable):
@Cache_this(limit=2, ignore_args=(0,)) class PSICOMP(Pickleable):
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
def psicomputations(self, kern, Z, qX, return_psi2_n=False):
raise NotImplementedError("Abstract method!")
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, qX):
raise NotImplementedError("Abstract method!")
def _setup_observers(self):
pass
from .gaussherm import PSICOMP_GH
class PSICOMP_RBF(PSICOMP):
@Cache_this(limit=5, ignore_args=(0,))
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
variance, lengthscale = kern.variance, kern.lengthscale
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior) return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=return_psi2_n)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior) return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
else: else:
raise ValueError("unknown distriubtion received for psi-statistics") raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=2, ignore_args=(0,1,2,3)) @Cache_this(limit=5, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variance, lengthscale = kern.variance, kern.lengthscale
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior) return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
@ -28,28 +44,26 @@ class PSICOMP_RBF(Pickleable):
else: else:
raise ValueError("unknown distriubtion received for psi-statistics") raise ValueError("unknown distriubtion received for psi-statistics")
def _setup_observers(self): class PSICOMP_Linear(PSICOMP):
pass
class PSICOMP_Linear(Pickleable): @Cache_this(limit=5, ignore_args=(0,))
def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
@Cache_this(limit=2, ignore_args=(0,)) variances = kern.variances
def psicomputations(self, variance, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
return linear_psi_comp.psicomputations(variance, Z, variational_posterior) return linear_psi_comp.psicomputations(variances, Z, variational_posterior, return_psi2_n=return_psi2_n)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return sslinear_psi_comp.psicomputations(variance, Z, variational_posterior) return sslinear_psi_comp.psicomputations(variances, Z, variational_posterior)
else: else:
raise ValueError("unknown distriubtion received for psi-statistics") raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=2, ignore_args=(0,1,2,3)) @Cache_this(limit=2, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior): def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variances = kern.variances
if isinstance(variational_posterior, variational.NormalPosterior): if isinstance(variational_posterior, variational.NormalPosterior):
return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior) return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variances, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior) return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variances, Z, variational_posterior)
else: else:
raise ValueError("unknown distriubtion received for psi-statistics") raise ValueError("unknown distriubtion received for psi-statistics")
def _setup_observers(self):
pass

View file

@ -0,0 +1,100 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
An approximated psi-statistics implementation based on Gauss-Hermite Quadrature
"""
import numpy as np
from ....core.parameterization import Param
from GPy.util.caching import Cache_this
from ....util.linalg import tdot
from . import PSICOMP
class PSICOMP_GH(PSICOMP):
"""
TODO: support Psi2 with shape NxMxM
"""
def __init__(self, degree=5, cache_K=True):
self.degree = degree
self.cache_K = cache_K
self.locs, self.weights = np.polynomial.hermite.hermgauss(degree)
self.locs *= np.sqrt(2.)
self.weights*= 1./np.sqrt(np.pi)
self.Xs = None
def _setup_observers(self):
pass
@Cache_this(limit=10, ignore_args=(0,))
def comp_K(self, Z, qX):
if self.Xs is None or self.Xs.shape != qX.mean.shape:
from ....core.parameterization import ObsAr
self.Xs = ObsAr(np.empty((self.degree,)+qX.mean.shape))
mu, S = qX.mean.values, qX.variance.values
S_sq = np.sqrt(S)
for i in xrange(self.degree):
self.Xs[i] = self.locs[i]*S_sq+mu
return self.Xs
@Cache_this(limit=10, ignore_args=(0,))
def psicomputations(self, kern, Z, qX, return_psi2_n=False):
mu, S = qX.mean.values, qX.variance.values
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
if self.cache_K: Xs = self.comp_K(Z, qX)
else: S_sq = np.sqrt(S)
psi0 = np.zeros((N,))
psi1 = np.zeros((N,M))
psi2 = np.zeros((M,M))
for i in xrange(self.degree):
if self.cache_K:
X = Xs[i]
else:
X = self.locs[i]*S_sq+mu
psi0 += self.weights[i]* kern.Kdiag(X)
Kfu = kern.K(X,Z)
psi1 += self.weights[i]* Kfu
psi2 += self.weights[i]* tdot(Kfu.T)
return psi0, psi1, psi2
@Cache_this(limit=10, ignore_args=(0, 2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, qX):
mu, S = qX.mean.values, qX.variance.values
if self.cache_K: Xs = self.comp_K(Z, qX)
S_sq = np.sqrt(S)
dtheta_old = kern.gradient.copy()
dtheta = np.zeros_like(kern.gradient)
if isinstance(Z, Param):
dZ = np.zeros_like(Z.values)
else:
dZ = np.zeros_like(Z)
dmu = np.zeros_like(mu)
dS = np.zeros_like(S)
for i in xrange(self.degree):
if self.cache_K:
X = Xs[i]
else:
X = self.locs[i]*S_sq+mu
dL_dpsi0_i = dL_dpsi0*self.weights[i]
kern.update_gradients_diag(dL_dpsi0_i, X)
dtheta += kern.gradient
dX = kern.gradients_X_diag(dL_dpsi0_i, X)
Kfu = kern.K(X,Z)
dL_dkfu = (dL_dpsi1+ 2.*Kfu.dot(dL_dpsi2))*self.weights[i]
kern.update_gradients_full(dL_dkfu, X, Z)
dtheta += kern.gradient
dX_i, dZ_i = kern.gradients_X_X2(dL_dkfu, X, Z)
dX += dX_i
dZ += dZ_i
dmu += dX
dS += dX*self.locs[i]/(2.*S_sq)
kern.gradient[:] = dtheta_old
return dtheta, dZ, dmu, dS

View file

@ -8,7 +8,7 @@ The package for the Psi statistics computation of the linear kernel for Bayesian
import numpy as np import numpy as np
from ....util.linalg import tdot from ....util.linalg import tdot
def psicomputations(variance, Z, variational_posterior): def psicomputations(variance, Z, variational_posterior, return_psi2_n=False):
""" """
Compute psi-statistics for ss-linear kernel Compute psi-statistics for ss-linear kernel
""" """
@ -21,8 +21,12 @@ def psicomputations(variance, Z, variational_posterior):
S = variational_posterior.variance S = variational_posterior.variance
psi0 = (variance*(np.square(mu)+S)).sum(axis=1) psi0 = (variance*(np.square(mu)+S)).sum(axis=1)
psi1 = np.dot(mu,(variance*Z).T) Zv = variance * Z
psi2 = np.dot(S.sum(axis=0)*np.square(variance)*Z,Z.T)+ tdot(psi1.T) psi1 = np.dot(mu,Zv.T)
if return_psi2_n:
psi2 = psi1[:,:,None] * psi1[:,None,:] + np.dot(S[:,None,:] * Zv[None,:,:], Zv.T)
else:
psi2 = np.dot(S.sum(axis=0) * Zv, Zv.T) + tdot(psi1.T)
return psi0, psi1, psi2 return psi0, psi1, psi2
@ -40,7 +44,7 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variati
dL_dmu += 2.*dL_dpsi0_var*mu+np.dot(dL_dpsi1,Z)*variance dL_dmu += 2.*dL_dpsi0_var*mu+np.dot(dL_dpsi1,Z)*variance
dL_dS += dL_dpsi0_var dL_dS += dL_dpsi0_var
dL_dZ += dL_dpsi1_mu*variance dL_dZ += dL_dpsi1_mu*variance
return dL_dvar, dL_dZ, dL_dmu, dL_dS return dL_dvar, dL_dZ, dL_dmu, dL_dS
def _psi2computations(dL_dpsi2, variance, Z, mu, S): def _psi2computations(dL_dpsi2, variance, Z, mu, S):
@ -56,22 +60,42 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S):
# _psi2_dZ MxQ # _psi2_dZ MxQ
# _psi2_dmu NxQ # _psi2_dmu NxQ
# _psi2_dS NxQ # _psi2_dS NxQ
variance2 = np.square(variance) variance2 = np.square(variance)
common_sum = np.dot(mu,(variance*Z).T) common_sum = np.dot(mu,(variance*Z).T)
Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0) if len(dL_dpsi2.shape)==2:
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
common_expect = np.dot(common_sum,np.dot(dL_dpsi2T,Z)) dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
Z2_expect = np.inner(common_sum,dL_dpsi2T) common_expect = np.dot(common_sum,np.dot(dL_dpsi2T,Z))
Z1_expect = np.dot(dL_dpsi2T,Z) Z2_expect = np.inner(common_sum,dL_dpsi2T)
Z1_expect = np.dot(dL_dpsi2T,Z)
dL_dvar = 2.*S.sum(axis=0)*variance*Z_expect+(common_expect*mu).sum(axis=0)
dL_dmu = common_expect*variance
dL_dS = np.empty(S.shape) dL_dvar = 2.*S.sum(axis=0)*variance*Z_expect+(common_expect*mu).sum(axis=0)
dL_dS[:] = Z_expect*variance2
dL_dZ = variance2*S.sum(axis=0)*Z1_expect+np.dot(Z2_expect.T,variance*mu) dL_dmu = common_expect*variance
dL_dS = np.empty(S.shape)
dL_dS[:] = Z_expect*variance2
dL_dZ = variance2*S.sum(axis=0)*Z1_expect+np.dot(Z2_expect.T,variance*mu)
else:
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
dL_dpsi2_ = dL_dpsi2.sum(axis=0)
Z_expect = (np.dot(dL_dpsi2.reshape(N*M,M),Z).reshape(N,M,Q)*Z[None,:,:]).sum(axis=1)
dL_dpsi2T = dL_dpsi2_+dL_dpsi2_.T
dL_dpsi2T_ = dL_dpsi2+np.swapaxes(dL_dpsi2, 1, 2)
common_expect = np.dot(common_sum,np.dot(dL_dpsi2T,Z))
common_expect_ = (common_sum[:,:,None]*np.dot(dL_dpsi2T_.reshape(N*M,M),Z).reshape(N,M,Q)).sum(axis=1)
Z2_expect = (common_sum[:,:,None]*dL_dpsi2T_).sum(axis=1)
Z1_expect = np.dot(dL_dpsi2T_.reshape(N*M,M),Z).reshape(N,M,Q)
dL_dvar = 2.*variance*(S*Z_expect).sum(axis=0)+(common_expect_*mu).sum(axis=0)
dL_dmu = common_expect_*variance
dL_dS = np.empty(S.shape)
dL_dS[:] = variance2* Z_expect
dL_dZ = variance2*(S[:,None,:]*Z1_expect).sum(axis=0)+np.dot(Z2_expect.T,variance*mu)
return dL_dvar, dL_dmu, dL_dS, dL_dZ return dL_dvar, dL_dmu, dL_dS, dL_dZ

View file

@ -5,13 +5,7 @@ The module for psi-statistics for RBF kernel
import numpy as np import numpy as np
from GPy.util.caching import Cacher from GPy.util.caching import Cacher
def psicomputations(variance, lengthscale, Z, variational_posterior): def psicomputations(variance, lengthscale, Z, variational_posterior, return_psi2_n=False):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2 # here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results: # Produced intermediate results:
# _psi1 NxM # _psi1 NxM
@ -21,16 +15,11 @@ def psicomputations(variance, lengthscale, Z, variational_posterior):
psi0 = np.empty(mu.shape[0]) psi0 = np.empty(mu.shape[0])
psi0[:] = variance psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S) psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0) psi2 = _psi2computations(variance, lengthscale, Z, mu, S)
if not return_psi2_n: psi2 = psi2.sum(axis=0)
return psi0, psi1, psi2 return psi0, psi1, psi2
def __psi1computations(variance, lengthscale, Z, mu, S): def __psi1computations(variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 # here are the "statistics" for psi1
# Produced intermediate results: # Produced intermediate results:
# _psi1 NxM # _psi1 NxM
@ -45,26 +34,19 @@ def __psi1computations(variance, lengthscale, Z, mu, S):
return _psi1 return _psi1
def __psi2computations(variance, lengthscale, Z, mu, S): def __psi2computations(variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi2 # here are the "statistics" for psi2
# Produced intermediate results: # Produced intermediate results:
# _psi2 MxM # _psi2 MxM
N,M,Q = mu.shape[0], Z.shape[0], mu.shape[1]
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
_psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N _psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
_psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM _psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
denom = 1./(2.*S+lengthscale2) denom = 1./(2.*S+lengthscale2)
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom) _psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+(2*(mu*denom).dot(Z_hat.reshape(M*M,Q).T) - denom.dot(np.square(Z_hat).reshape(M*M,Q).T)).reshape(N,M,M)
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2) _psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
return _psi2 return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
@ -86,13 +68,6 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscal
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S): def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
"""
dL_dpsi1 - NxM
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 # here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1 # Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1 # _dL_dvariance 1
@ -118,13 +93,6 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S): def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
dL_dpsi2 - MxM
"""
# here are the "statistics" for psi2 # here are the "statistics" for psi2
# Produced the derivatives w.r.t. psi2: # Produced the derivatives w.r.t. psi2:
# _dL_dvariance 1 # _dL_dvariance 1
@ -157,5 +125,5 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
_psi1computations = Cacher(__psi1computations, limit=1) _psi1computations = Cacher(__psi1computations, limit=5)
_psi2computations = Cacher(__psi2computations, limit=1) _psi2computations = Cacher(__psi2computations, limit=5)

View file

@ -7,13 +7,6 @@ from ....util.caching import Cache_this
from . import PSICOMP_RBF from . import PSICOMP_RBF
from ....util import gpu_init from ....util import gpu_init
try:
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
from ....util.linalg_gpu import sum_axis
except:
pass
gpu_code = """ gpu_code = """
// define THREADNUM // define THREADNUM
@ -241,7 +234,11 @@ gpu_code = """
class PSICOMP_RBF_GPU(PSICOMP_RBF): class PSICOMP_RBF_GPU(PSICOMP_RBF):
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False): def __init__(self, threadnum=256, blocknum=30, GPU_direct=False):
from pycuda.compiler import SourceModule
from ....util.gpu_init import initGPU
initGPU()
self.GPU_direct = GPU_direct self.GPU_direct = GPU_direct
self.gpuCache = None self.gpuCache = None
@ -264,7 +261,8 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
memo[id(self)] = s memo[id(self)] = s
return s return s
def _initGPUCache(self, N, M, Q): def _initGPUCache(self, N, M, Q):
import pycuda.gpuarray as gpuarray
if self.gpuCache == None: if self.gpuCache == None:
self.gpuCache = { self.gpuCache = {
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'), 'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
@ -320,13 +318,14 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
def get_dimensions(self, Z, variational_posterior): def get_dimensions(self, Z, variational_posterior):
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1] return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
@Cache_this(limit=1, ignore_args=(0,)) @Cache_this(limit=5, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior): def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
""" """
Z - MxQ Z - MxQ
mu - NxQ mu - NxQ
S - NxQ S - NxQ
""" """
variance, lengthscale = kern.variance, kern.lengthscale
N,M,Q = self.get_dimensions(Z, variational_posterior) N,M,Q = self.get_dimensions(Z, variational_posterior)
self._initGPUCache(N,M,Q) self._initGPUCache(N,M,Q)
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance) self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
@ -355,8 +354,10 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
else: else:
return psi0, psi1_gpu.get(), psi2_gpu.get() return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,1,2,3)) @Cache_this(limit=5, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variance, lengthscale = kern.variance, kern.lengthscale
from ....util.linalg_gpu import sum_axis
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)
N,M,Q = self.get_dimensions(Z, variational_posterior) N,M,Q = self.get_dimensions(Z, variational_posterior)

View file

@ -9,7 +9,7 @@ from ....util.linalg import tdot
import numpy as np import numpy as np
def psicomputations(variance, Z, variational_posterior): def psicomputations(variance, Z, variational_posterior, return_psi2_n=False):
""" """
Compute psi-statistics for ss-linear kernel Compute psi-statistics for ss-linear kernel
""" """

View file

@ -9,7 +9,7 @@ import numpy as np
try: try:
from scipy import weave from scipy import weave
def _psicomputations(variance, lengthscale, Z, variational_posterior): def _psicomputations(variance, lengthscale, Z, variational_posterior):
""" """
Z - MxQ Z - MxQ
@ -23,7 +23,7 @@ try:
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.binary_prob gamma = variational_posterior.binary_prob
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale) l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1) log_denom1 = np.log(S/l2+1)
@ -35,13 +35,13 @@ try:
psi0[:] = variance psi0[:] = variance
psi1 = np.empty((N,M)) psi1 = np.empty((N,M))
psi2n = np.empty((N,M,M)) psi2n = np.empty((N,M,M))
from ....util.misc import param_to_array from ....util.misc import param_to_array
S = param_to_array(S) S = param_to_array(S)
mu = param_to_array(mu) mu = param_to_array(mu)
gamma = param_to_array(gamma) gamma = param_to_array(gamma)
Z = param_to_array(Z) Z = param_to_array(Z)
support_code = """ support_code = """
#include <math.h> #include <math.h>
""" """
@ -56,11 +56,11 @@ try:
double lq = l2(q); double lq = l2(q);
double Zm1q = Z(m1,q); double Zm1q = Z(m1,q);
double Zm2q = Z(m2,q); double Zm2q = Z(m2,q);
if(m2==0) { if(m2==0) {
// Compute Psi_1 // Compute Psi_1
double muZ = mu(n,q)-Z(m1,q); double muZ = mu(n,q)-Z(m1,q);
double psi1_exp1 = log_gamma(n,q) - (muZ*muZ/(Snq+lq) +log_denom1(n,q))/2.; double psi1_exp1 = log_gamma(n,q) - (muZ*muZ/(Snq+lq) +log_denom1(n,q))/2.;
double psi1_exp2 = log_gamma1(n,q) -Zm1q*Zm1q/(2.*lq); double psi1_exp2 = log_gamma1(n,q) -Zm1q*Zm1q/(2.*lq);
log_psi1 += (psi1_exp1>psi1_exp2)?psi1_exp1+log1p(exp(psi1_exp2-psi1_exp1)):psi1_exp2+log1p(exp(psi1_exp1-psi1_exp2)); log_psi1 += (psi1_exp1>psi1_exp2)?psi1_exp1+log1p(exp(psi1_exp2-psi1_exp1)):psi1_exp2+log1p(exp(psi1_exp1-psi1_exp2));
@ -69,10 +69,10 @@ try:
double muZhat = mu(n,q) - (Zm1q+Zm2q)/2.; double muZhat = mu(n,q) - (Zm1q+Zm2q)/2.;
double Z2 = Zm1q*Zm1q+ Zm2q*Zm2q; double Z2 = Zm1q*Zm1q+ Zm2q*Zm2q;
double dZ = Zm1q - Zm2q; double dZ = Zm1q - Zm2q;
double psi2_exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q); double psi2_exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
double psi2_exp2 = log_gamma1(n,q) - Z2/(2.*lq); double psi2_exp2 = log_gamma1(n,q) - Z2/(2.*lq);
log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2)); log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2));
} }
double exp_psi2_n = exp(log_psi2_n); double exp_psi2_n = exp(log_psi2_n);
psi2n(n,m1,m2) = variance*variance*exp_psi2_n; psi2n(n,m1,m2) = variance*variance*exp_psi2_n;
@ -83,18 +83,18 @@ try:
} }
""" """
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz) weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
psi2 = psi2n.sum(axis=0) psi2 = psi2n.sum(axis=0)
return psi0,psi1,psi2,psi2n return psi0,psi1,psi2,psi2n
from GPy.util.caching import Cacher from GPy.util.caching import Cacher
psicomputations = Cacher(_psicomputations, limit=1) psicomputations = Cacher(_psicomputations, limit=1)
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)
_,psi1,_,psi2n = psicomputations(variance, lengthscale, Z, variational_posterior) _,psi1,_,psi2n = psicomputations(variance, lengthscale, Z, variational_posterior)
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.binary_prob gamma = variational_posterior.binary_prob
@ -105,7 +105,7 @@ try:
log_gamma = np.log(gamma) log_gamma = np.log(gamma)
log_gamma1 = np.log(1.-gamma) log_gamma1 = np.log(1.-gamma)
variance = float(variance) variance = float(variance)
dvar = np.zeros(1) dvar = np.zeros(1)
dmu = np.zeros((N,Q)) dmu = np.zeros((N,Q))
dS = np.zeros((N,Q)) dS = np.zeros((N,Q))
@ -113,13 +113,13 @@ try:
dl = np.zeros(Q) dl = np.zeros(Q)
dZ = np.zeros((M,Q)) dZ = np.zeros((M,Q))
dvar += np.sum(dL_dpsi0) dvar += np.sum(dL_dpsi0)
from ....util.misc import param_to_array from ....util.misc import param_to_array
S = param_to_array(S) S = param_to_array(S)
mu = param_to_array(mu) mu = param_to_array(mu)
gamma = param_to_array(gamma) gamma = param_to_array(gamma)
Z = param_to_array(Z) Z = param_to_array(Z)
support_code = """ support_code = """
#include <math.h> #include <math.h>
""" """
@ -136,16 +136,16 @@ try:
double Zm2q = Z(m2,q); double Zm2q = Z(m2,q);
double gnq = gamma(n,q); double gnq = gamma(n,q);
double mu_nq = mu(n,q); double mu_nq = mu(n,q);
if(m2==0) { if(m2==0) {
// Compute Psi_1 // Compute Psi_1
double lpsi1 = psi1(n,m1)*dL_dpsi1(n,m1); double lpsi1 = psi1(n,m1)*dL_dpsi1(n,m1);
if(q==0) {dvar(0) += lpsi1/variance;} if(q==0) {dvar(0) += lpsi1/variance;}
double Zmu = Zm1q - mu_nq; double Zmu = Zm1q - mu_nq;
double denom = Snq+lq; double denom = Snq+lq;
double Zmu2_denom = Zmu*Zmu/denom; double Zmu2_denom = Zmu*Zmu/denom;
double exp1 = log_gamma(n,q)-(Zmu*Zmu/(Snq+lq)+log_denom1(n,q))/(2.); double exp1 = log_gamma(n,q)-(Zmu*Zmu/(Snq+lq)+log_denom1(n,q))/(2.);
double exp2 = log_gamma1(n,q)-Zm1q*Zm1q/(2.*lq); double exp2 = log_gamma1(n,q)-Zm1q*Zm1q/(2.*lq);
double d_exp1,d_exp2; double d_exp1,d_exp2;
@ -157,7 +157,7 @@ try:
d_exp2 = 1.; d_exp2 = 1.;
} }
double exp_sum = d_exp1+d_exp2; double exp_sum = d_exp1+d_exp2;
dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum); dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.; dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum; dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
@ -167,13 +167,13 @@ try:
// Compute Psi_2 // Compute Psi_2
double lpsi2 = psi2n(n,m1,m2)*dL_dpsi2(m1,m2); double lpsi2 = psi2n(n,m1,m2)*dL_dpsi2(m1,m2);
if(q==0) {dvar(0) += lpsi2*2/variance;} if(q==0) {dvar(0) += lpsi2*2/variance;}
double dZm1m2 = Zm1q - Zm2q; double dZm1m2 = Zm1q - Zm2q;
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q; double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
double muZhat = mu_nq - (Zm1q + Zm2q)/2.; double muZhat = mu_nq - (Zm1q + Zm2q)/2.;
double denom = 2.*Snq+lq; double denom = 2.*Snq+lq;
double muZhat2_denom = muZhat*muZhat/denom; double muZhat2_denom = muZhat*muZhat/denom;
double exp1 = dZm1m2*dZm1m2/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q); double exp1 = dZm1m2*dZm1m2/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
double exp2 = log_gamma1(n,q) - Z2/(2.*lq); double exp2 = log_gamma1(n,q) - Z2/(2.*lq);
double d_exp1,d_exp2; double d_exp1,d_exp2;
@ -185,23 +185,23 @@ try:
d_exp2 = 1.; d_exp2 = 1.;
} }
double exp_sum = d_exp1+d_exp2; double exp_sum = d_exp1+d_exp2;
dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum; dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum; dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum; dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum; dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum; dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
} }
} }
} }
} }
""" """
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz) weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
dl *= 2.*lengthscale dl *= 2.*lengthscale
if not ARD: if not ARD:
dl = dl.sum() dl = dl.sum()
return dvar, dl, dZ, dmu, dS, dgamma return dvar, dl, dZ, dmu, dS, dgamma
except: except:
@ -219,13 +219,13 @@ except:
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.binary_prob gamma = variational_posterior.binary_prob
psi0 = np.empty(mu.shape[0]) psi0 = np.empty(mu.shape[0])
psi0[:] = variance psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma) psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma) psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
return psi0, psi1, psi2 return psi0, psi1, psi2
def _psi1computations(variance, lengthscale, Z, mu, S, gamma): def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
""" """
Z - MxQ Z - MxQ
@ -236,9 +236,9 @@ except:
# here are the "statistics" for psi1 # here are the "statistics" for psi1
# Produced intermediate results: # Produced intermediate results:
# _psi1 NxM # _psi1 NxM
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
# psi1 # psi1
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ _psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ _psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
@ -251,9 +251,9 @@ except:
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ _psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM _psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM _psi1 = variance * np.exp(_psi1_exp_sum) # NxM
return _psi1 return _psi1
def _psi2computations(variance, lengthscale, Z, mu, S, gamma): def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
""" """
Z - MxQ Z - MxQ
@ -264,14 +264,14 @@ except:
# here are the "statistics" for psi2 # here are the "statistics" for psi2
# Produced intermediate results: # Produced intermediate results:
# _psi2 MxM # _psi2 MxM
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2 # psi2
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ _psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom) _psi2_denom_sqrt = np.sqrt(_psi2_denom)
@ -284,28 +284,28 @@ except:
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
return _psi2 return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2 dL_dlengscale = dl_psi1 + dl_psi2
if not ARD: if not ARD:
dL_dlengscale = dL_dlengscale.sum() dL_dlengscale = dL_dlengscale.sum()
dL_dgamma = dgamma_psi1 + dgamma_psi2 dL_dgamma = dgamma_psi1 + dgamma_psi2
dL_dmu = dmu_psi1 + dmu_psi2 dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2 dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2 dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma): def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
""" """
dL_dpsi1 - NxM dL_dpsi1 - NxM
@ -322,9 +322,9 @@ except:
# _dL_dgamma NxQ # _dL_dgamma NxQ
# _dL_dmu NxQ # _dL_dmu NxQ
# _dL_dS NxQ # _dL_dS NxQ
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
# psi1 # psi1
_psi1_denom = S / lengthscale2 + 1. # NxQ _psi1_denom = S / lengthscale2 + 1. # NxQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ _psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
@ -346,9 +346,9 @@ except:
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ _dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z)) _dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z)) _dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma): def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
""" """
Z - MxQ Z - MxQ
@ -365,14 +365,14 @@ except:
# _dL_dgamma NxQ # _dL_dgamma NxQ
# _dL_dmu NxQ # _dL_dmu NxQ
# _dL_dS NxQ # _dL_dS NxQ
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q _psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q _psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q _psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ _psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2 # psi2
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ _psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom) _psi2_denom_sqrt = np.sqrt(_psi2_denom)
@ -384,7 +384,7 @@ except:
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2) _psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max)) _psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM _psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ _psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ _psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ _psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM _psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
@ -394,5 +394,5 @@ except:
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq) _dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z)) _dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)) _dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma

View file

@ -6,14 +6,7 @@ The module for psi-statistics for RBF kernel for Spike-and-Slab GPLVM
import numpy as np import numpy as np
from ....util.caching import Cache_this from ....util.caching import Cache_this
from . import PSICOMP_RBF from . import PSICOMP_RBF
from ....util import gpu_init
try:
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
from ....util.linalg_gpu import sum_axis
except:
pass
gpu_code = """ gpu_code = """
// define THREADNUM // define THREADNUM
@ -292,6 +285,11 @@ gpu_code = """
class PSICOMP_SSRBF_GPU(PSICOMP_RBF): class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False): def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
from pycuda.compiler import SourceModule
from ....util.gpu_init import initGPU
initGPU()
self.GPU_direct = GPU_direct self.GPU_direct = GPU_direct
self.gpuCache = None self.gpuCache = None
@ -314,7 +312,8 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
memo[id(self)] = s memo[id(self)] = s
return s return s
def _initGPUCache(self, N, M, Q): def _initGPUCache(self, N, M, Q):
import pycuda.gpuarray as gpuarray
if self.gpuCache == None: if self.gpuCache == None:
self.gpuCache = { self.gpuCache = {
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'), 'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
@ -377,12 +376,13 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1] return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
@Cache_this(limit=1, ignore_args=(0,)) @Cache_this(limit=1, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior): def psicomputations(self, kern, Z, variational_posterior, return_psi2_n=False):
""" """
Z - MxQ Z - MxQ
mu - NxQ mu - NxQ
S - NxQ S - NxQ
""" """
variance, lengthscale = kern.variance, kern.lengthscale
N,M,Q = self.get_dimensions(Z, variational_posterior) N,M,Q = self.get_dimensions(Z, variational_posterior)
self._initGPUCache(N,M,Q) self._initGPUCache(N,M,Q)
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
@ -409,8 +409,10 @@ class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
else: else:
return psi0, psi1_gpu.get(), psi2_gpu.get() return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,1,2,3)) @Cache_this(limit=1, ignore_args=(0,2,3,4))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
variance, lengthscale = kern.variance, kern.lengthscale
from ....util.linalg_gpu import sum_axis
ARD = (len(lengthscale)!=1) ARD = (len(lengthscale)!=1)
N,M,Q = self.get_dimensions(Z, variational_posterior) N,M,Q = self.get_dimensions(Z, variational_posterior)

View file

@ -31,6 +31,9 @@ class RBF(Stationary):
def dK_dr(self, r): def dK_dr(self, r):
return -r*self.K_of_r(r) return -r*self.K_of_r(r)
def dK2_drdr(self, r):
return (r**2-1)*self.K_of_r(r)
def __getstate__(self): def __getstate__(self):
dc = super(RBF, self).__getstate__() dc = super(RBF, self).__getstate__()
if self.useGPU: if self.useGPU:
@ -50,22 +53,25 @@ class RBF(Stationary):
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0] return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1] return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2] return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=False)[2]
def psi2n(self, Z, variational_posterior):
return self.psicomp.psicomputations(self, Z, variational_posterior, return_psi2_n=True)[2]
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2] dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[:2]
self.variance.gradient = dL_dvar self.variance.gradient = dL_dvar
self.lengthscale.gradient = dL_dlengscale self.lengthscale.gradient = dL_dlengscale
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2] return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2]
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:] return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[3:]

52
GPy/kern/_src/spline.py Normal file
View file

@ -0,0 +1,52 @@
# Copyright (c) 2015, Thomas Hornung
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from .kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Spline(Kern):
"""
Linear spline kernel. You need to specify 2 parameters: the variance and c.
The variance is defined in powers of 10. Thus specifying -2 means 10^-2.
The parameter c allows to define the stiffness of the spline fit. A very stiff
spline equals linear regression.
See https://www.youtube.com/watch?v=50Vgw11qn0o starting at minute 1:17:28
Lit: Wahba, 1990
"""
def __init__(self, input_dim, variance=1., c=1., active_dims=None, name='spline'):
super(Spline, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.c = Param('c', c)
self.link_parameters(self.variance,self.c)
def K(self, X, X2=None):
if X2 is None: X2=X
term1 = (X+8.)*(X2.T+8.)/16.
term2 = abs((X-X2.T)/16.)**3
term3 = ((X+8.)/16.)**3 + ((X2.T+8.)/16.)**3
return (self.variance**2 * (1. + (1.+self.c) * term1 + self.c/3. * (term2 - term3)))
def Kdiag(self, X):
term1 = np.square(X+8.,X+8.)/16.
term3 = 2. * ((X+8.)/16.)**3
return (self.variance**2 * (1. + (1.+self.c) * term1 - self.c/3. * term3))[:,0]
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None: X2=X
term1 = (X+8.)*(X2.T+8.)/16.
term2 = abs((X-X2.T)/16.)**3
term3 = ((X+8.)/16.)**3 + ((X2.T+8.)/16.)**3
self.variance.gradient = np.sum(dL_dK * (2*self.variance * (1. + (1.+self.c) * term1 + self.c/3. * ( term2 - term3))))
self.c.gradient = np.sum(dL_dK * (self.variance**2* (term1 + 1./3.*(term2 - term3))))
def update_gradients_diag(self, dL_dKdiag, X):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2=None):
raise NotImplementedError
def gradients_X_diag(self, dL_dKdiag, X):
raise NotImplementedError

View file

@ -0,0 +1,166 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The standard periodic kernel which mentioned in:
[1] Gaussian Processes for Machine Learning, C. E. Rasmussen, C. K. I. Williams.
The MIT Press, 2005.
[2] Introduction to Gaussian processes. D. J. C. MacKay. In C. M. Bishop, editor,
Neural Networks and Machine Learning, pages 133-165. Springer, 1998.
"""
from .kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
class StdPeriodic(Kern):
"""
Standart periodic kernel
.. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\theta_1` in the formula above
:type variance: float
:param wavelength: the vector of wavelengths :math:`\lambda_i`. If None then 1.0 is assumed.
:type wavelength: array or list of the appropriate size (or float if there is only one wavelength parameter)
:param lengthscale: the vector of lengthscale :math:`\l_i`. If None then 1.0 is assumed.
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD1: Auto Relevance Determination with respect to wavelength.
If equal to "False" one single wavelength parameter :math:`\lambda_i` for
each dimension is assumed, otherwise there is one lengthscale
parameter per dimension.
:type ARD1: Boolean
:param ARD2: Auto Relevance Determination with respect to lengthscale.
If equal to "False" one single wavelength parameter :math:`l_i` for
each dimension is assumed, otherwise there is one lengthscale
parameter per dimension.
:type ARD2: Boolean
:param active_dims: indices of dimensions which are used in the computation of the kernel
:type wavelength: array or list of the appropriate size
:param name: Name of the kernel for output
:type String
:param useGPU: whether of not use GPU
:type Boolean
"""
def __init__(self, input_dim, variance=1., wavelength=None, lengthscale=None, ARD1=False, ARD2=False, active_dims=None, name='std_periodic',useGPU=False):
super(StdPeriodic, self).__init__(input_dim, active_dims, name, useGPU=useGPU)
self.input_dim = input_dim
self.ARD1 = ARD1 # correspond to wavelengths
self.ARD2 = ARD2 # correspond to lengthscales
self.name = name
if self.ARD1 == False:
if wavelength is not None:
wavelength = np.asarray(wavelength)
assert wavelength.size == 1, "Only one wavelength needed for non-ARD kernel"
else:
wavelength = np.ones(1)
else:
if wavelength is not None:
wavelength = np.asarray(wavelength)
assert wavelength.size == input_dim, "bad number of wavelengths"
else:
wavelength = np.ones(input_dim)
if self.ARD2 == False:
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
lengthscale = np.ones(1)
else:
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(input_dim)
self.variance = Param('variance', variance, Logexp())
assert self.variance.size==1, "Variance size must be one"
self.wavelengths = Param('wavelengths', wavelength, Logexp())
self.lengthscales = Param('lengthscales', lengthscale, Logexp())
self.link_parameters(self.variance, self.wavelengths, self.lengthscales)
def parameters_changed(self):
"""
This functions deals as a callback for each optimization iteration.
If one optimization step was successfull and the parameters
this callback function will be called to be able to update any
precomputations for the kernel.
"""
pass
def K(self, X, X2=None):
"""Compute the covariance matrix between X and X2."""
if X2 is None:
X2 = X
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths
exp_dist = np.exp( -0.5* np.sum( np.square( np.sin( base ) / self.lengthscales ), axis = -1 ) )
return self.variance * exp_dist
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X."""
ret = np.empty(X.shape[0])
ret[:] = self.variance
return ret
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None:
X2 = X
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths
sin_base = np.sin( base )
exp_dist = np.exp( -0.5* np.sum( np.square( sin_base / self.lengthscales ), axis = -1 ) )
dwl = self.variance * (1.0/np.square(self.lengthscales)) * sin_base*np.cos(base) * (base / self.wavelengths)
dl = self.variance * np.square( sin_base) / np.power( self.lengthscales, 3)
self.variance.gradient = np.sum(exp_dist * dL_dK)
#target[0] += np.sum( exp_dist * dL_dK)
if self.ARD1: # different wavelengths
self.wavelengths.gradient = (dwl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
else: # same wavelengths
self.wavelengths.gradient = np.sum(dwl.sum(-1) * exp_dist * dL_dK)
if self.ARD2: # different lengthscales
self.lengthscales.gradient = (dl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
else: # same lengthscales
self.lengthscales.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
self.variance.gradient = np.sum(dL_dKdiag)
self.wavelengths.gradient = 0
self.lengthscales.gradient = 0
# def gradients_X(self, dL_dK, X, X2=None):
# """derivative of the covariance matrix with respect to X."""
#
# raise NotImplemented("Periodic kernel: dK_dX not implemented")
#
# def gradients_X_diag(self, dL_dKdiag, X):
#
# raise NotImplemented("Periodic kernel: dKdiag_dX not implemented")

View file

@ -24,6 +24,13 @@ class Static(Kern):
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape) return np.zeros(X.shape)
def gradients_XX(self, dL_dK, X, X2):
if X2 is None:
X2 = X
return np.zeros((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
def gradients_XX_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(Z.shape) return np.zeros(Z.shape)
@ -59,6 +66,9 @@ class White(Static):
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64) return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def psi2n(self, Z, variational_posterior):
return np.zeros((1, Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None: if X2 is None:
self.variance.gradient = np.trace(dL_dK) self.variance.gradient = np.trace(dL_dK)
@ -92,6 +102,11 @@ class Bias(Static):
ret[:] = self.variance*self.variance*variational_posterior.shape[0] ret[:] = self.variance*self.variance*variational_posterior.shape[0]
return ret return ret
def psi2n(self, Z, variational_posterior):
ret = np.empty((1, Z.shape[0], Z.shape[0]), dtype=np.float64)
ret[:] = self.variance*self.variance
return ret
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()*variational_posterior.shape[0] self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()*variational_posterior.shape[0]
@ -120,6 +135,9 @@ class Fixed(Static):
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64) return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def psi2n(self, Z, variational_posterior):
return np.zeros((1, Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum() self.variance.gradient = dL_dpsi0.sum()

View file

@ -15,7 +15,7 @@ from ...util.caching import Cache_this
try: try:
from . import stationary_cython from . import stationary_cython
except ImportError: except ImportError:
print('warning in sationary: failed to import cython module: falling back to numpy') print('warning in stationary: failed to import cython module: falling back to numpy')
config.set('cython', 'working', 'false') config.set('cython', 'working', 'false')
@ -25,13 +25,16 @@ class Stationary(Kern):
Stationary covariance fucntion depend only on r, where r is defined as Stationary covariance fucntion depend only on r, where r is defined as
r = \sqrt{ \sum_{q=1}^Q (x_q - x'_q)^2 } .. math::
r(x, x') = \\sqrt{ \\sum_{q=1}^Q (x_q - x'_q)^2 }
The covariance function k(x, x' can then be written k(r). The covariance function k(x, x' can then be written k(r).
In this implementation, r is scaled by the lengthscales parameter(s): In this implementation, r is scaled by the lengthscales parameter(s):
r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }. .. math::
r(x, x') = \\sqrt{ \\sum_{q=1}^Q \\frac{(x_q - x'_q)^2}{\ell_q^2} }.
By default, there's only one lengthscale: seaprate lengthscales for each By default, there's only one lengthscale: seaprate lengthscales for each
dimension can be enables by setting ARD=True. dimension can be enables by setting ARD=True.
@ -39,11 +42,12 @@ class Stationary(Kern):
To implement a stationary covariance function using this class, one need To implement a stationary covariance function using this class, one need
only define the covariance function k(r), and it derivative. only define the covariance function k(r), and it derivative.
... ```
def K_of_r(self, r): def K_of_r(self, r):
return foo return foo
def dK_dr(self, r): def dK_dr(self, r):
return bar return bar
```
The lengthscale(s) and variance parameters are added to the structure automatically. The lengthscale(s) and variance parameters are added to the structure automatically.
@ -77,6 +81,10 @@ class Stationary(Kern):
def dK_dr(self, r): def dK_dr(self, r):
raise NotImplementedError("implement derivative of the covariance function wrt r to use this class") raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
@Cache_this(limit=20, ignore_args=())
def dK2_drdr(self, r):
raise NotImplementedError("implement second derivative of covariance wrt r to use this method")
@Cache_this(limit=5, ignore_args=()) @Cache_this(limit=5, ignore_args=())
def K(self, X, X2=None): def K(self, X, X2=None):
""" """
@ -89,11 +97,16 @@ class Stationary(Kern):
r = self._scaled_dist(X, X2) r = self._scaled_dist(X, X2)
return self.K_of_r(r) return self.K_of_r(r)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=20, ignore_args=())
def dK_dr_via_X(self, X, X2): def dK_dr_via_X(self, X, X2):
#a convenience function, so we can cache dK_dr #a convenience function, so we can cache dK_dr
return self.dK_dr(self._scaled_dist(X, X2)) return self.dK_dr(self._scaled_dist(X, X2))
@Cache_this(limit=3, ignore_args=())
def dK2_drdr_via_X(self, X, X2):
#a convenience function, so we can cache dK_dr
return self.dK2_drdr(self._scaled_dist(X, X2))
def _unscaled_dist(self, X, X2=None): def _unscaled_dist(self, X, X2=None):
""" """
Compute the Euclidean distance between each row of X and X2, or between Compute the Euclidean distance between each row of X and X2, or between
@ -114,12 +127,13 @@ class Stationary(Kern):
r2 = np.clip(r2, 0, np.inf) r2 = np.clip(r2, 0, np.inf)
return np.sqrt(r2) return np.sqrt(r2)
@Cache_this(limit=5, ignore_args=()) @Cache_this(limit=20, ignore_args=())
def _scaled_dist(self, X, X2=None): def _scaled_dist(self, X, X2=None):
""" """
Efficiently compute the scaled distance, r. Efficiently compute the scaled distance, r.
r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 ) ..math::
r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
Note that if thre is only one lengthscale, l comes outside the sum. In Note that if thre is only one lengthscale, l comes outside the sum. In
this case we compute the unscaled distance first (in a separate this case we compute the unscaled distance first (in a separate
@ -201,6 +215,59 @@ class Stationary(Kern):
else: else:
return self._gradients_X_pure(dL_dK, X, X2) return self._gradients_X_pure(dL_dK, X, X2)
def gradients_XX(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
..math:
\frac{\partial^2 K}{\partial X\partial X2}
..returns:
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
Thus, we return the second derivative in X2.
"""
# The off diagonals in Q are always zero, this should also be true for the Linear kernel...
# According to multivariable chain rule, we can chain the second derivative through r:
# d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2:
invdist = self._inv_dist(X, X2)
invdist2 = invdist**2
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp1 = dL_dr * invdist
dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK
tmp2 = dL_drdr * invdist2
l2 = np.ones(X.shape[1]) * self.lengthscale**2
if X2 is None:
X2 = X
tmp1 -= np.eye(X.shape[0])*self.variance
else:
tmp1[X==X2.T] -= self.variance
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
#grad = np.empty(X.shape, dtype=np.float64)
for q in range(self.input_dim):
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
return grad
def gradients_XX_diag(self, dL_dK, X):
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
..math:
\frac{\partial^2 K}{\partial X\partial X2}
..returns:
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ]
"""
return np.ones(X.shape) * self.variance/self.lengthscale**2
def _gradients_X_pure(self, dL_dK, X, X2=None): def _gradients_X_pure(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2) invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
@ -259,7 +326,7 @@ class OU(Stationary):
.. math:: .. math::
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
""" """
@ -279,7 +346,7 @@ class Matern32(Stationary):
.. math:: .. math::
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^{\\text{input_dim}} \\frac{(x_i-y_i)^2}{\ell_i^2} }
""" """
@ -326,7 +393,7 @@ class Matern52(Stationary):
.. math:: .. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name) super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

File diff suppressed because it is too large Load diff

View file

@ -4,14 +4,15 @@
import numpy as np import numpy as np
cimport numpy as np cimport numpy as np
from cython.parallel import prange from cython.parallel import prange
cimport cython
ctypedef np.float64_t DTYPE_t ctypedef np.float64_t DTYPE_t
cdef extern from "stationary_utils.h": cdef extern from "stationary_utils.h":
void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad) void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad) nogil
cdef extern from "stationary_utils.h": cdef extern from "stationary_utils.h":
void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad) void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad) nogil
def grad_X(int N, int D, int M, def grad_X(int N, int D, int M,
np.ndarray[DTYPE_t, ndim=2] _X, np.ndarray[DTYPE_t, ndim=2] _X,
@ -22,18 +23,18 @@ def grad_X(int N, int D, int M,
cdef double *X2 = <double*> _X2.data cdef double *X2 = <double*> _X2.data
cdef double *tmp = <double*> _tmp.data cdef double *tmp = <double*> _tmp.data
cdef double *grad = <double*> _grad.data cdef double *grad = <double*> _grad.data
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place. with nogil:
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
@cython.cdivision(True)
def grad_X_cython(int N, int D, int M, double[:,:] X, double[:,:] X2, double[:,:] tmp, double[:,:] grad): def grad_X_cython(int N, int D, int M, double[:,:] X, double[:,:] X2, double[:,:] tmp, double[:,:] grad):
cdef int n,d,nd,m cdef int n,d,nd,m
for nd in prange(N*D, nogil=True): for nd in prange(N * D, nogil=True):
n = nd/D n = nd / D
d = nd%D d = nd % D
grad[n,d] = 0.0 grad[n,d] = 0.0
for m in range(M): for m in range(M):
grad[n,d] += tmp[n,m]*(X[n,d]-X2[m,d]) grad[n,d] += tmp[n, m] * (X[n, d] - X2[m, d])
def lengthscale_grads_in_c(int N, int M, int Q, def lengthscale_grads_in_c(int N, int M, int Q,
np.ndarray[DTYPE_t, ndim=2] _tmp, np.ndarray[DTYPE_t, ndim=2] _tmp,
@ -44,16 +45,16 @@ def lengthscale_grads_in_c(int N, int M, int Q,
cdef double *X = <double*> _X.data cdef double *X = <double*> _X.data
cdef double *X2 = <double*> _X2.data cdef double *X2 = <double*> _X2.data
cdef double *grad = <double*> _grad.data cdef double *grad = <double*> _grad.data
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place. with nogil:
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
def lengthscale_grads(int N, int M, int Q, double[:,:] tmp, double[:,:] X, double[:,:] X2, double[:] grad): def lengthscale_grads(int N, int M, int Q, double[:,:] tmp, double[:,:] X, double[:,:] X2, double[:] grad):
cdef int q, n, m cdef int q, n, m
cdef double gradq, dist cdef double gradq, dist
for q in range(Q): with nogil:
grad[q] = 0.0 for q in range(Q):
for n in range(N): grad[q] = 0.0
for m in range(M): for n in range(N):
dist = X[n,q] - X2[m,q] for m in range(M):
grad[q] += tmp[n,m]*dist*dist dist = X[n,q] - X2[m,q]
grad[q] += tmp[n, m] * dist * dist

View file

@ -1,3 +1,5 @@
#ifndef __APPLE__
#include <omp.h> #include <omp.h>
#endif
void _grad_X(int N, int D, int M, double*X, double* X2, double* tmp, double* grad); void _grad_X(int N, int D, int M, double*X, double* X2, double* tmp, double* grad);
void _lengthscale_grads(int N, int D, int M, double* X, double* X2, double* tmp, double* grad); void _lengthscale_grads(int N, int D, int M, double* X, double* X2, double* tmp, double* grad);

View file

@ -15,7 +15,7 @@ class TruncLinear(Kern):
.. math:: .. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q) k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
@ -54,7 +54,7 @@ class TruncLinear(Kern):
self.delta = Param('delta', delta) self.delta = Param('delta', delta)
self.add_parameter(self.variances) self.add_parameter(self.variances)
self.add_parameter(self.delta) self.add_parameter(self.delta)
@Cache_this(limit=2) @Cache_this(limit=2)
def K(self, X, X2=None): def K(self, X, X2=None):
XX = self.variances*self._product(X, X2) XX = self.variances*self._product(X, X2)
@ -114,7 +114,7 @@ class TruncLinear_inf(Kern):
.. math:: .. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q) k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \sigma_q)
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
@ -148,8 +148,8 @@ class TruncLinear_inf(Kern):
self.variances = Param('variances', variances, Logexp()) self.variances = Param('variances', variances, Logexp())
self.add_parameter(self.variances) self.add_parameter(self.variances)
# @Cache_this(limit=2) # @Cache_this(limit=2)
def K(self, X, X2=None): def K(self, X, X2=None):
tmp = self._product(X, X2) tmp = self._product(X, X2)

View file

@ -1,6 +1,6 @@
from .bernoulli import Bernoulli from .bernoulli import Bernoulli
from .exponential import Exponential from .exponential import Exponential
from .gaussian import Gaussian from .gaussian import Gaussian, HeteroscedasticGaussian
from .gamma import Gamma from .gamma import Gamma
from .poisson import Poisson from .poisson import Poisson
from .student_t import StudentT from .student_t import StudentT

View file

@ -85,6 +85,7 @@ class Bernoulli(Likelihood):
gh_x, gh_w = gh_points gh_x, gh_w = gh_points
gh_w = gh_w / np.sqrt(np.pi)
shape = m.shape shape = m.shape
m,v,Y = m.flatten(), v.flatten(), Y.flatten() m,v,Y = m.flatten(), v.flatten(), Y.flatten()
Ysign = np.where(Y==1,1,-1) Ysign = np.where(Y==1,1,-1)
@ -232,6 +233,17 @@ class Bernoulli(Likelihood):
np.seterr(**state) np.seterr(**state)
return d3logpdf_dlink3 return d3logpdf_dlink3
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
"""
Get the "quantiles" of the binary labels (Bernoulli draws). all the
quantiles must be either 0 or 1, since those are the only values the
draw can take!
"""
p = self.predictive_mean(mu, var)
return [np.asarray(p>(q/100.), dtype=np.int32) for q in quantiles]
def samples(self, gp, Y_metadata=None): def samples(self, gp, Y_metadata=None):
""" """
Returns a set of samples of observations based on a given value of the latent variable. Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -124,7 +124,7 @@ class Exponential(Likelihood):
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3) #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
return d3lik_dlink3 return d3lik_dlink3
def samples(self, gp): def samples(self, gp, Y_metadata=None):
""" """
Returns a set of samples of observations based on a given value of the latent variable. Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -48,6 +48,7 @@ class Gaussian(Likelihood):
def betaY(self,Y,Y_metadata=None): def betaY(self,Y,Y_metadata=None):
#TODO: ~Ricardo this does not live here #TODO: ~Ricardo this does not live here
raise RuntimeError("Please notify the GPy developers, this should not happen")
return Y/self.gaussian_variance(Y_metadata) return Y/self.gaussian_variance(Y_metadata)
def gaussian_variance(self, Y_metadata=None): def gaussian_variance(self, Y_metadata=None):
@ -315,9 +316,44 @@ class Gaussian(Likelihood):
return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None): def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
if not isinstance(self.gp_link, link_functions.Identity):
return super(Gaussian, self).variational_expectations(Y=Y, m=m, v=v, gh_points=gh_points, Y_metadata=Y_metadata)
lik_var = float(self.variance) lik_var = float(self.variance)
F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var
dF_dmu = (Y - m)/lik_var dF_dmu = (Y - m)/lik_var
dF_dv = np.ones_like(v)*(-0.5/lik_var) dF_dv = np.ones_like(v)*(-0.5/lik_var)
dF_dtheta = -0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2) dF_dtheta = -0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2)
return F, dF_dmu, dF_dv, dF_dtheta.reshape(1, Y.shape[0], Y.shape[1]) return F, dF_dmu, dF_dv, dF_dtheta.reshape(1, Y.shape[0], Y.shape[1])
class HeteroscedasticGaussian(Gaussian):
def __init__(self, Y_metadata, gp_link=None, variance=1., name='het_Gauss'):
if gp_link is None:
gp_link = link_functions.Identity()
if not isinstance(gp_link, link_functions.Identity):
print("Warning, Exact inference is not implemeted for non-identity link functions,\
if you are not already, ensure Laplace inference_method is used")
super(HeteroscedasticGaussian, self).__init__(gp_link, np.ones(Y_metadata['output_index'].shape)*variance, name)
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
return dL_dKdiag[Y_metadata['output_index']]
def gaussian_variance(self, Y_metadata=None):
return self.variance[Y_metadata['output_index'].flatten()]
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
_s = self.variance[Y_metadata['output_index'].flatten()]
if full_cov:
if var.ndim == 2:
var += np.eye(var.shape[0])*_s
if var.ndim == 3:
var += np.atleast_3d(np.eye(var.shape[0])*_s)
else:
var += _s
return mu, var
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
_s = self.variance[Y_metadata['output_index'].flatten()]
return [stats.norm.ppf(q/100.)*np.sqrt(var + _s) + mu for q in quantiles]

View file

@ -607,7 +607,7 @@ class Likelihood(Parameterized):
pred_mean = self.predictive_mean(mu, var, Y_metadata=Y_metadata) pred_mean = self.predictive_mean(mu, var, Y_metadata=Y_metadata)
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata=Y_metadata) pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata=Y_metadata)
except NotImplementedError: except NotImplementedError:
print "Finding predictive mean and variance via sampling rather than quadrature" print("Finding predictive mean and variance via sampling rather than quadrature")
Nf_samp = 300 Nf_samp = 300
Ny_samp = 1 Ny_samp = 1
s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu
@ -622,7 +622,7 @@ class Likelihood(Parameterized):
Nf_samp = 300 Nf_samp = 300
Ny_samp = 1 Ny_samp = 1
s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu
ss_y = self.samples(s, Y_metadata, samples=Ny_samp) ss_y = self.samples(s, Y_metadata)#, samples=Ny_samp)
#ss_y = ss_y.reshape(mu.shape[0], mu.shape[1], Nf_samp*Ny_samp) #ss_y = ss_y.reshape(mu.shape[0], mu.shape[1], Nf_samp*Ny_samp)
pred_quantiles = [np.percentile(ss_y, q, axis=1)[:,None] for q in quantiles] pred_quantiles = [np.percentile(ss_y, q, axis=1)[:,None] for q in quantiles]

View file

@ -2,6 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
import scipy
from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf
import scipy as sp import scipy as sp
from ..util.misc import safe_exp, safe_square, safe_cube, safe_quad, safe_three_times from ..util.misc import safe_exp, safe_square, safe_cube, safe_quad, safe_three_times
@ -67,7 +68,7 @@ class Probit(GPTransformation):
.. math:: .. math::
g(f) = \\Phi^{-1} (mu) g(f) = \\Phi^{-1} (mu)
""" """
def transf(self,f): def transf(self,f):
return std_norm_cdf(f) return std_norm_cdf(f)
@ -140,7 +141,7 @@ class Log_ex_1(GPTransformation):
""" """
def transf(self,f): def transf(self,f):
return np.log1p(safe_exp(f)) return scipy.special.log1p(safe_exp(f))
def dtransf_df(self,f): def dtransf_df(self,f):
ef = safe_exp(f) ef = safe_exp(f)

View file

@ -145,5 +145,7 @@ class Poisson(Likelihood):
""" """
orig_shape = gp.shape orig_shape = gp.shape
gp = gp.flatten() gp = gp.flatten()
# Ysim = np.random.poisson(self.gp_link.transf(gp), [samples, gp.size]).T
# return Ysim.reshape(orig_shape+(samples,))
Ysim = np.random.poisson(self.gp_link.transf(gp)) Ysim = np.random.poisson(self.gp_link.transf(gp))
return Ysim.reshape(orig_shape) return Ysim.reshape(orig_shape)

View file

@ -9,6 +9,7 @@ from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_miniba
import logging import logging
from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch from GPy.models.sparse_gp_minibatch import SparseGPMiniBatch
from GPy.core.parameterization.param import Param from GPy.core.parameterization.param import Param
from GPy.core.parameterization.observable_array import ObsAr
class BayesianGPLVMMiniBatch(SparseGPMiniBatch): class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
""" """
@ -80,46 +81,10 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
"""Get the gradients of the posterior distribution of X in its specific form.""" """Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient return X.mean.gradient, X.variance.gradient
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kw): def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kw):
posterior, log_marginal_likelihood, grad_dict, current_values, value_indices = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm, subset_indices=subset_indices, **kw) posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm,
psi0=psi0, psi1=psi1, psi2=psi2, **kw)
if self.has_uncertain_inputs(): return posterior, log_marginal_likelihood, grad_dict
current_values['meangrad'], current_values['vargrad'] = self.kern.gradients_qX_expectations(
variational_posterior=X,
Z=Z, dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=grad_dict['dL_dpsi2'])
else:
current_values['Xgrad'] = self.kern.gradients_X(grad_dict['dL_dKnm'], X, Z)
current_values['Xgrad'] += self.kern.gradients_X_diag(grad_dict['dL_dKdiag'], X)
if subset_indices is not None:
value_indices['Xgrad'] = subset_indices['samples']
kl_fctr = self.kl_factr
if self.has_uncertain_inputs():
if self.missing_data:
d = self.output_dim
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)/d
else:
log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(X)
# Subsetting Variational Posterior objects, makes the gradients
# empty. We need them to be 0 though:
X.mean.gradient[:] = 0
X.variance.gradient[:] = 0
self.variational_prior.update_gradients_KL(X)
if self.missing_data:
current_values['meangrad'] += kl_fctr*X.mean.gradient/d
current_values['vargrad'] += kl_fctr*X.variance.gradient/d
else:
current_values['meangrad'] += kl_fctr*X.mean.gradient
current_values['vargrad'] += kl_fctr*X.variance.gradient
if subset_indices is not None:
value_indices['meangrad'] = subset_indices['samples']
value_indices['vargrad'] = subset_indices['samples']
return posterior, log_marginal_likelihood, grad_dict, current_values, value_indices
def _outer_values_update(self, full_values): def _outer_values_update(self, full_values):
""" """
@ -128,22 +93,47 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
""" """
super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values) super(BayesianGPLVMMiniBatch, self)._outer_values_update(full_values)
if self.has_uncertain_inputs(): if self.has_uncertain_inputs():
self.X.mean.gradient = full_values['meangrad'] meangrad_tmp, vargrad_tmp = self.kern.gradients_qX_expectations(
self.X.variance.gradient = full_values['vargrad'] variational_posterior=self.X,
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
dL_dpsi1=full_values['dL_dpsi1'],
dL_dpsi2=full_values['dL_dpsi2'],
psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
self.X.mean.gradient = meangrad_tmp
self.X.variance.gradient = vargrad_tmp
else: else:
self.X.gradient = full_values['Xgrad'] self.X.gradient = self.kern.gradients_X(full_values['dL_dKnm'], self.X, self.Z)
self.X.gradient += self.kern.gradients_X_diag(full_values['dL_dKdiag'], self.X)
def _outer_init_full_values(self): def _outer_init_full_values(self):
if self.has_uncertain_inputs(): return super(BayesianGPLVMMiniBatch, self)._outer_init_full_values()
return dict(meangrad=np.zeros(self.X.mean.shape),
vargrad=np.zeros(self.X.variance.shape))
else:
return dict(Xgrad=np.zeros(self.X.shape))
def parameters_changed(self): def parameters_changed(self):
super(BayesianGPLVMMiniBatch,self).parameters_changed() super(BayesianGPLVMMiniBatch,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch):
return kl_fctr = self.kl_factr
if kl_fctr > 0:
Xgrad = self.X.gradient.copy()
self.X.gradient[:] = 0
self.variational_prior.update_gradients_KL(self.X)
if self.missing_data or not self.stochastics:
self.X.mean.gradient = kl_fctr*self.X.mean.gradient
self.X.variance.gradient = kl_fctr*self.X.variance.gradient
else:
d = self.output_dim
self.X.mean.gradient = kl_fctr*self.X.mean.gradient*self.stochastics.batchsize/d
self.X.variance.gradient = kl_fctr*self.X.variance.gradient*self.stochastics.batchsize/d
self.X.gradient += Xgrad
if self.missing_data or not self.stochastics:
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
elif self.stochastics:
d = self.output_dim
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d
self._Xgrad = self.X.gradient.copy()
def plot_latent(self, labels=None, which_indices=None, def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40, resolution=50, ax=None, marker='o', s=40,

View file

@ -1,11 +1,11 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). # Copyright (c) 2015 James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ..core import GP from ..core import GP
from ..models import GPLVM from . import GPLVM
from ..mappings import * from .. import mappings
class BCGPLVM(GPLVM): class BCGPLVM(GPLVM):
@ -16,33 +16,31 @@ class BCGPLVM(GPLVM):
:type Y: np.ndarray :type Y: np.ndarray
:param input_dim: latent dimensionality :param input_dim: latent dimensionality
:type input_dim: int :type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
:param mapping: mapping for back constraint :param mapping: mapping for back constraint
:type mapping: GPy.core.Mapping object :type mapping: GPy.core.Mapping object
""" """
def __init__(self, Y, input_dim, init='PCA', X=None, kernel=None, normalize_Y=False, mapping=None): def __init__(self, Y, input_dim, kernel=None, mapping=None):
if mapping is None: if mapping is None:
mapping = Kernel(X=Y, output_dim=input_dim) mapping = mappings.MLP(input_dim=Y.shape[1],
output_dim=input_dim,
hidden_dim=10)
else:
assert mapping.input_dim==Y.shape[1], "mapping input dim does not work for Y dimension"
assert mapping.output_dim==input_dim, "mapping output dim does not work for self.input_dim"
GPLVM.__init__(self, Y, input_dim, X=mapping.f(Y), kernel=kernel, name="bcgplvm")
self.unlink_parameter(self.X)
self.mapping = mapping self.mapping = mapping
GPLVM.__init__(self, Y, input_dim, init, X, kernel, normalize_Y) self.link_parameter(self.mapping)
self.X = self.mapping.f(self.likelihood.Y)
def _get_param_names(self): self.X = self.mapping.f(self.Y)
return self.mapping._get_param_names() + GP._get_param_names(self)
def _get_params(self): def parameters_changed(self):
return np.hstack((self.mapping._get_params(), GP._get_params(self))) self.X = self.mapping.f(self.Y)
GP.parameters_changed(self)
Xgradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
self.mapping.update_gradients(Xgradient, self.Y)
def _set_params(self, x):
self.mapping._set_params(x[:self.mapping.num_params])
self.X = self.mapping.f(self.likelihood.Y)
GP._set_params(self, x[self.mapping.num_params:])
def _log_likelihood_gradients(self):
dL_df = self.kern.gradients_X(self.dL_dK, self.X)
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))

View file

@ -16,6 +16,8 @@ class GPHeteroscedasticRegression(GP):
:param X: input observations :param X: input observations
:param Y: observed values :param Y: observed values
:param kernel: a GPy kernel, defaults to rbf :param kernel: a GPy kernel, defaults to rbf
NB: This model does not make inference on the noise outside the training set
""" """
def __init__(self, X, Y, kernel=None, Y_metadata=None): def __init__(self, X, Y, kernel=None, Y_metadata=None):
@ -30,10 +32,7 @@ class GPHeteroscedasticRegression(GP):
kernel = kern.RBF(X.shape[1]) kernel = kern.RBF(X.shape[1])
#Likelihood #Likelihood
#likelihoods_list = [likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for j in range(Ny)] likelihood = likelihoods.HeteroscedasticGaussian(Y_metadata)
noise_terms = np.unique(Y_metadata['output_index'].flatten())
likelihoods_list = [likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for j in noise_terms]
likelihood = likelihoods.MixedNoise(likelihoods_list=likelihoods_list)
super(GPHeteroscedasticRegression, self).__init__(X,Y,kernel,likelihood, Y_metadata=Y_metadata) super(GPHeteroscedasticRegression, self).__init__(X,Y,kernel,likelihood, Y_metadata=Y_metadata)

View file

@ -1,5 +1,5 @@
# Copyright (c) 2014, James Hensman, Alan Saul # Copyright (c) 2014, James Hensman, Alan Saul
# Distributed under the terms of the GNU General public License, see LICENSE.txt # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ..core.model import Model from ..core.model import Model

View file

@ -26,12 +26,12 @@ class GPRegression(GP):
""" """
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1.): def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1., mean_function=None):
if kernel is None: if kernel is None:
kernel = kern.RBF(X.shape[1]) kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Gaussian(variance=noise_var) likelihood = likelihoods.Gaussian(variance=noise_var)
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer) super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function)

View file

@ -1,20 +1,17 @@
# Copyright (c) 2014, James Hensman, Alan Saul # Copyright (c) 2014, James Hensman, Alan Saul
# Distributed under the terms of the GNU General public License, see LICENSE.txt # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from scipy import stats from ..core import GP
from scipy.special import erf
from ..core.model import Model
from ..core.parameterization import ObsAr from ..core.parameterization import ObsAr
from .. import kern from .. import kern
from ..core.parameterization.param import Param from ..core.parameterization.param import Param
from ..util.linalg import pdinv from ..inference.latent_function_inference import VarGauss
from ..likelihoods import Gaussian
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
class GPVariationalGaussianApproximation(Model): class GPVariationalGaussianApproximation(GP):
""" """
The Variational Gaussian Approximation revisited The Variational Gaussian Approximation revisited
@ -26,70 +23,14 @@ class GPVariationalGaussianApproximation(Model):
pages = {786--792}, pages = {786--792},
} }
""" """
def __init__(self, X, Y, kernel, likelihood=None, Y_metadata=None): def __init__(self, X, Y, kernel, likelihood, Y_metadata=None):
Model.__init__(self,'Variational GP')
if likelihood is None:
likelihood = Gaussian()
# accept the construction arguments
self.X = ObsAr(X)
self.Y = Y
self.num_data, self.input_dim = self.X.shape
self.Y_metadata = Y_metadata
self.kern = kernel num_data = Y.shape[0]
self.likelihood = likelihood self.alpha = Param('alpha', np.zeros((num_data,1))) # only one latent fn for now.
self.link_parameter(self.kern) self.beta = Param('beta', np.ones(num_data))
self.link_parameter(self.likelihood)
inf = VarGauss(self.alpha, self.beta)
super(GPVariationalGaussianApproximation, self).__init__(X, Y, kernel, likelihood, name='VarGP', inference_method=inf)
self.alpha = Param('alpha', np.zeros((self.num_data,1))) # only one latent fn for now.
self.beta = Param('beta', np.ones(self.num_data))
self.link_parameter(self.alpha) self.link_parameter(self.alpha)
self.link_parameter(self.beta) self.link_parameter(self.beta)
def log_likelihood(self):
return self._log_lik
def parameters_changed(self):
K = self.kern.K(self.X)
m = K.dot(self.alpha)
KB = K*self.beta[:, None]
BKB = KB*self.beta[None, :]
A = np.eye(self.num_data) + BKB
Ai, LA, _, Alogdet = pdinv(A)
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
var = np.diag(Sigma).reshape(-1,1)
F, dF_dm, dF_dv, dF_dthetaL = self.likelihood.variational_expectations(self.Y, m, var, Y_metadata=self.Y_metadata)
self.likelihood.gradient = dF_dthetaL.sum(1).sum(1)
dF_da = np.dot(K, dF_dm)
SigmaB = Sigma*self.beta
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + np.sum(m*self.alpha))
dKL_da = m
A_A2 = Ai - Ai.dot(Ai)
dKL_db = np.diag(np.dot(KB.T, A_A2))
self._log_lik = F.sum() - KL
self.alpha.gradient = dF_da - dKL_da
self.beta.gradient = dF_db - dKL_db
# K-gradients
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
tmp = Ai*self.beta[:, None]/self.beta[None, :]
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
def _raw_predict(self, Xnew):
"""
Predict the function(s) at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
"""
Wi, _, _, _ = pdinv(self.kern.K(self.X) + np.diag(self.beta**-2))
Kux = self.kern.K(self.X, Xnew)
mu = np.dot(Kux.T, self.alpha)
WiKux = np.dot(Wi, Kux)
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(WiKux*Kux, 0)
return mu, var.reshape(-1,1)

View file

@ -36,6 +36,7 @@ class GPLVM(GP):
likelihood = Gaussian() likelihood = Gaussian()
super(GPLVM, self).__init__(X, Y, kernel, likelihood, name='GPLVM') super(GPLVM, self).__init__(X, Y, kernel, likelihood, name='GPLVM')
self.X = Param('latent_mean', X) self.X = Param('latent_mean', X)
self.link_parameter(self.X, index=0) self.link_parameter(self.X, index=0)
@ -43,27 +44,30 @@ class GPLVM(GP):
super(GPLVM, self).parameters_changed() super(GPLVM, self).parameters_changed()
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None) self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
def jacobian(self,X): #def jacobian(self,X):
J = np.zeros((X.shape[0],X.shape[1],self.output_dim)) # J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
for i in range(self.output_dim): # for i in range(self.output_dim):
J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X) # J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X)
return J # return J
def magnification(self,X): #def magnification(self,X):
target=np.zeros(X.shape[0]) # target=np.zeros(X.shape[0])
#J = np.zeros((X.shape[0],X.shape[1],self.output_dim)) # #J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
J = self.jacobian(X) ## J = self.jacobian(X)
for i in range(X.shape[0]): # for i in range(X.shape[0]):
target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:])))) # target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
return target # return target
def plot(self): def plot(self):
assert self.likelihood.Y.shape[1] == 2 assert self.Y.shape[1] == 2, "too high dimensional to plot. Try plot_latent"
pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) # @UndefinedVariable from matplotlib import pyplot as plt
plt.scatter(self.Y[:, 0],
self.Y[:, 1],
40, self.X[:, 0].copy(),
linewidth=0, cmap=plt.cm.jet)
Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None] Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
mu, _ = self.predict(Xnew) mu, _ = self.predict(Xnew)
import pylab as pb plt.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
def plot_latent(self, labels=None, which_indices=None, def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40, resolution=50, ax=None, marker='o', s=40,
@ -78,6 +82,3 @@ class GPLVM(GP):
resolution, ax, marker, s, resolution, ax, marker, s,
fignum, False, legend, fignum, False, legend,
plot_limits, aspect, updates, **kwargs) plot_limits, aspect, updates, **kwargs)
def plot_magnification(self, *args, **kwargs):
return util.plot_latent.plot_magnification(self, *args, **kwargs)

View file

@ -251,7 +251,7 @@ class HessianChecker(GradientChecker):
print(grad_string) print(grad_string)
if plot: if plot:
import pylab as pb from matplotlib import pyplot as pb
fig, axes = pb.subplots(2, 2) fig, axes = pb.subplots(2, 2)
max_lim = numpy.max(numpy.vstack((analytic_hess, numeric_hess))) max_lim = numpy.max(numpy.vstack((analytic_hess, numeric_hess)))
min_lim = numpy.min(numpy.vstack((analytic_hess, numeric_hess))) min_lim = numpy.min(numpy.vstack((analytic_hess, numeric_hess)))

View file

@ -170,20 +170,19 @@ class MRD(BayesianGPLVMMiniBatch):
self._log_marginal_likelihood += b._log_marginal_likelihood self._log_marginal_likelihood += b._log_marginal_likelihood
self.logger.info('working on im <{}>'.format(hex(id(i)))) self.logger.info('working on im <{}>'.format(hex(id(i))))
self.Z.gradient[:] += b.full_values['Zgrad'] self.Z.gradient[:] += b.Z.gradient#full_values['Zgrad']
grad_dict = b.full_values #grad_dict = b.full_values
if self.has_uncertain_inputs(): if self.has_uncertain_inputs():
self.X.mean.gradient += grad_dict['meangrad'] self.X.gradient += b._Xgrad
self.X.variance.gradient += grad_dict['vargrad']
else: else:
self.X.gradient += grad_dict['Xgrad'] self.X.gradient += b._Xgrad
if self.has_uncertain_inputs(): #if self.has_uncertain_inputs():
# update for the KL divergence # # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) # self.variational_prior.update_gradients_KL(self.X)
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) # self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
pass # pass
def log_likelihood(self): def log_likelihood(self):
return self._log_marginal_likelihood return self._log_marginal_likelihood

View file

@ -44,7 +44,7 @@ class SparseGPMiniBatch(SparseGP):
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
name='sparse gp', Y_metadata=None, normalizer=False, name='sparse gp', Y_metadata=None, normalizer=False,
missing_data=False, stochastic=False, batchsize=1): missing_data=False, stochastic=False, batchsize=1):
# pick a sensible inference method # pick a sensible inference method
if inference_method is None: if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian): if isinstance(likelihood, likelihoods.Gaussian):
@ -63,10 +63,10 @@ class SparseGPMiniBatch(SparseGP):
if stochastic and missing_data: if stochastic and missing_data:
self.missing_data = True self.missing_data = True
self.stochastics = SparseGPStochastics(self, batchsize) self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
elif stochastic and not missing_data: elif stochastic and not missing_data:
self.missing_data = False self.missing_data = False
self.stochastics = SparseGPStochastics(self, batchsize) self.stochastics = SparseGPStochastics(self, batchsize, self.missing_data)
elif missing_data: elif missing_data:
self.missing_data = True self.missing_data = True
self.stochastics = SparseGPMissing(self) self.stochastics = SparseGPMissing(self)
@ -80,7 +80,7 @@ class SparseGPMiniBatch(SparseGP):
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, subset_indices=None, **kwargs): def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kwargs):
""" """
This is the standard part, which usually belongs in parameters_changed. This is the standard part, which usually belongs in parameters_changed.
@ -99,47 +99,13 @@ class SparseGPMiniBatch(SparseGP):
like them into this dictionary for inner use of the indices inside the like them into this dictionary for inner use of the indices inside the
algorithm. algorithm.
""" """
try: if psi2 is None:
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=None, **kwargs) psi2_sum_n = None
except:
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata)
current_values = {}
likelihood.update_gradients(grad_dict['dL_dthetaL'])
current_values['likgrad'] = likelihood.gradient.copy()
if subset_indices is None:
subset_indices = {}
if isinstance(X, VariationalPosterior):
#gradients wrt kernel
dL_dKmm = grad_dict['dL_dKmm']
kern.update_gradients_full(dL_dKmm, Z, None)
current_values['kerngrad'] = kern.gradient.copy()
kern.update_gradients_expectations(variational_posterior=X,
Z=Z,
dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=grad_dict['dL_dpsi1'],
dL_dpsi2=grad_dict['dL_dpsi2'])
current_values['kerngrad'] += kern.gradient
#gradients wrt Z
current_values['Zgrad'] = kern.gradients_X(dL_dKmm, Z)
current_values['Zgrad'] += kern.gradients_Z_expectations(
grad_dict['dL_dpsi0'],
grad_dict['dL_dpsi1'],
grad_dict['dL_dpsi2'],
Z=Z,
variational_posterior=X)
else: else:
#gradients wrt kernel psi2_sum_n = psi2.sum(axis=0)
kern.update_gradients_diag(grad_dict['dL_dKdiag'], X) posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm,
current_values['kerngrad'] = kern.gradient.copy() dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs)
kern.update_gradients_full(grad_dict['dL_dKnm'], X, Z) return posterior, log_marginal_likelihood, grad_dict
current_values['kerngrad'] += kern.gradient
kern.update_gradients_full(grad_dict['dL_dKmm'], Z, None)
current_values['kerngrad'] += kern.gradient
#gradients wrt Z
current_values['Zgrad'] = kern.gradients_X(grad_dict['dL_dKmm'], Z)
current_values['Zgrad'] += kern.gradients_X(grad_dict['dL_dKnm'].T, Z, X)
return posterior, log_marginal_likelihood, grad_dict, current_values, subset_indices
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None): def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
""" """
@ -173,7 +139,10 @@ class SparseGPMiniBatch(SparseGP):
else: else:
index = slice(None) index = slice(None)
if key in full_values: if key in full_values:
full_values[key][index] += current_values[key] try:
full_values[key][index] += current_values[key]
except:
full_values[key] += current_values[key]
else: else:
full_values[key] = current_values[key] full_values[key] = current_values[key]
@ -192,9 +161,41 @@ class SparseGPMiniBatch(SparseGP):
Here you put the values, which were collected before in the right places. Here you put the values, which were collected before in the right places.
E.g. set the gradients of parameters, etc. E.g. set the gradients of parameters, etc.
""" """
self.likelihood.gradient = full_values['likgrad'] if self.has_uncertain_inputs():
self.kern.gradient = full_values['kerngrad'] #gradients wrt kernel
self.Z.gradient = full_values['Zgrad'] dL_dKmm = full_values['dL_dKmm']
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
kgrad = self.kern.gradient.copy()
self.kern.update_gradients_expectations(
variational_posterior=self.X,
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
dL_dpsi1=full_values['dL_dpsi1'],
dL_dpsi2=full_values['dL_dpsi2'])
self.kern.gradient += kgrad
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations(
variational_posterior=self.X,
Z=self.Z, dL_dpsi0=full_values['dL_dpsi0'],
dL_dpsi1=full_values['dL_dpsi1'],
dL_dpsi2=full_values['dL_dpsi2'])
else:
#gradients wrt kernel
self.kern.update_gradients_diag(full_values['dL_dKdiag'], self.X)
kgrad = self.kern.gradient.copy()
self.kern.update_gradients_full(full_values['dL_dKnm'], self.X, self.Z)
kgrad += self.kern.gradient
self.kern.update_gradients_full(full_values['dL_dKmm'], self.Z, None)
self.kern.gradient += kgrad
#kgrad += self.kern.gradient
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(full_values['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(full_values['dL_dKnm'].T, self.Z, self.X)
self.likelihood.update_gradients(full_values['dL_dthetaL'])
def _outer_init_full_values(self): def _outer_init_full_values(self):
""" """
@ -209,7 +210,15 @@ class SparseGPMiniBatch(SparseGP):
to initialize the gradients for the mean and the variance in order to to initialize the gradients for the mean and the variance in order to
have the full gradient for indexing) have the full gradient for indexing)
""" """
return {} retd = dict(dL_dKmm=np.zeros((self.Z.shape[0], self.Z.shape[0])))
if self.has_uncertain_inputs():
retd.update(dict(dL_dpsi0=np.zeros(self.X.shape[0]),
dL_dpsi1=np.zeros((self.X.shape[0], self.Z.shape[0])),
dL_dpsi2=np.zeros((self.X.shape[0], self.Z.shape[0], self.Z.shape[0]))))
else:
retd.update({'dL_dKdiag': np.zeros(self.X.shape[0]),
'dL_dKnm': np.zeros((self.X.shape[0], self.Z.shape[0]))})
return retd
def _outer_loop_for_missing_data(self): def _outer_loop_for_missing_data(self):
Lm = None Lm = None
@ -231,28 +240,36 @@ class SparseGPMiniBatch(SparseGP):
print(message, end=' ') print(message, end=' ')
for d, ninan in self.stochastics.d: for d, ninan in self.stochastics.d:
if not self.stochastics: if not self.stochastics:
print(' '*(len(message)) + '\r', end=' ') print(' '*(len(message)) + '\r', end=' ')
message = m_f(d) message = m_f(d)
print(message, end=' ') print(message, end=' ')
posterior, log_marginal_likelihood, \ psi0ni = self.psi0[ninan]
grad_dict, current_values, value_indices = self._inner_parameters_changed( psi1ni = self.psi1[ninan]
if self.has_uncertain_inputs():
psi2ni = self.psi2[ninan]
value_indices = dict(outputs=d, samples=ninan, dL_dpsi0=ninan, dL_dpsi1=ninan, dL_dpsi2=ninan)
else:
psi2ni = None
value_indices = dict(outputs=d, samples=ninan, dL_dKdiag=ninan, dL_dKnm=ninan)
posterior, log_marginal_likelihood, grad_dict = self._inner_parameters_changed(
self.kern, self.X[ninan], self.kern, self.X[ninan],
self.Z, self.likelihood, self.Z, self.likelihood,
self.Y_normalized[ninan][:, d], self.Y_metadata, self.Y_normalized[ninan][:, d], self.Y_metadata,
Lm, dL_dKmm, Lm, dL_dKmm,
subset_indices=dict(outputs=d, samples=ninan)) psi0=psi0ni, psi1=psi1ni, psi2=psi2ni)
self._inner_take_over_or_update(self.full_values, current_values, value_indices) # Fill out the full values by adding in the apporpriate grad_dict
self._inner_values_update(current_values) # values
self._inner_take_over_or_update(self.full_values, grad_dict, value_indices)
self._inner_values_update(grad_dict) # What is this for? -> MRD
Lm = posterior.K_chol
dL_dKmm = grad_dict['dL_dKmm']
woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None] woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
woodbury_vector[:, d] = posterior.woodbury_vector woodbury_vector[:, d] = posterior.woodbury_vector
self._log_marginal_likelihood += log_marginal_likelihood self._log_marginal_likelihood += log_marginal_likelihood
if not self.stochastics: if not self.stochastics:
print('') print('')
@ -260,10 +277,10 @@ class SparseGPMiniBatch(SparseGP):
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol) K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
self._outer_values_update(self.full_values) self._outer_values_update(self.full_values)
if self.has_uncertain_inputs():
self.kern.return_psi2_n = False
def _outer_loop_without_missing_data(self): def _outer_loop_without_missing_data(self):
self._log_marginal_likelihood = 0
if self.posterior is None: if self.posterior is None:
woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim)) woodbury_inv = np.zeros((self.num_inducing, self.num_inducing, self.output_dim))
woodbury_vector = np.zeros((self.num_inducing, self.output_dim)) woodbury_vector = np.zeros((self.num_inducing, self.output_dim))
@ -271,17 +288,16 @@ class SparseGPMiniBatch(SparseGP):
woodbury_inv = self.posterior._woodbury_inv woodbury_inv = self.posterior._woodbury_inv
woodbury_vector = self.posterior._woodbury_vector woodbury_vector = self.posterior._woodbury_vector
d = self.stochastics.d d = self.stochastics.d[0][0]
posterior, log_marginal_likelihood, \ posterior, log_marginal_likelihood, grad_dict= self._inner_parameters_changed(
grad_dict, self.full_values, _ = self._inner_parameters_changed(
self.kern, self.X, self.kern, self.X,
self.Z, self.likelihood, self.Z, self.likelihood,
self.Y_normalized[:, d], self.Y_metadata) self.Y_normalized[:, d], self.Y_metadata)
self.grad_dict = grad_dict self.grad_dict = grad_dict
self._log_marginal_likelihood += log_marginal_likelihood self._log_marginal_likelihood = log_marginal_likelihood
self._outer_values_update(self.full_values) self._outer_values_update(self.grad_dict)
woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None] woodbury_inv[:, :, d] = posterior.woodbury_inv[:, :, None]
woodbury_vector[:, d] = posterior.woodbury_vector woodbury_vector[:, d] = posterior.woodbury_vector
@ -290,10 +306,23 @@ class SparseGPMiniBatch(SparseGP):
K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol) K=posterior._K, mean=None, cov=None, K_chol=posterior.K_chol)
def parameters_changed(self): def parameters_changed(self):
#Compute the psi statistics for N once, but don't sum out N in psi2
if self.has_uncertain_inputs():
#psi0 = ObsAr(self.kern.psi0(self.Z, self.X))
#psi1 = ObsAr(self.kern.psi1(self.Z, self.X))
#psi2 = ObsAr(self.kern.psi2(self.Z, self.X))
self.psi0 = self.kern.psi0(self.Z, self.X)
self.psi1 = self.kern.psi1(self.Z, self.X)
self.psi2 = self.kern.psi2n(self.Z, self.X)
else:
self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.X, self.Z)
self.psi2 = None
if self.missing_data: if self.missing_data:
self._outer_loop_for_missing_data() self._outer_loop_for_missing_data()
elif self.stochastics: elif self.stochastics:
self._outer_loop_without_missing_data() self._outer_loop_without_missing_data()
else: else:
self.posterior, self._log_marginal_likelihood, self.grad_dict, self.full_values, _ = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
self._outer_values_update(self.full_values) self._outer_values_update(self.grad_dict)

View file

@ -4,4 +4,8 @@
try: try:
from . import matplot_dep from . import matplot_dep
except (ImportError, NameError): except (ImportError, NameError):
print('Fail to load GPy.plotting.matplot_dep.') # Matplotlib not available
import warnings
warnings.warn(ImportWarning("Matplotlib not available, install newest version of Matplotlib for plotting"))
#sys.modules['matplotlib'] =
#sys.modules[__name__+'.matplot_dep'] = ImportWarning("Matplotlib not available, install newest version of Matplotlib for plotting")

View file

@ -3,7 +3,7 @@
import matplotlib as mpl import matplotlib as mpl
import pylab as pb from matplotlib import pyplot as pb
import sys import sys
#sys.path.append('/home/james/mlprojects/sitran_cluster/') #sys.path.append('/home/james/mlprojects/sitran_cluster/')
#from switch_pylab_backend import * #from switch_pylab_backend import *
@ -159,7 +159,7 @@ cdict_Alu = {'red' :((0./5,colorsRGB['Aluminium1'][0]/256.,colorsRGB['Aluminium1
# cmap_BGR = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_BGR,256) # cmap_BGR = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_BGR,256)
# cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256) # cmap_RB = mpl.colors.LinearSegmentedColormap('TangoRedBlue',cdict_RB,256)
if __name__=='__main__': if __name__=='__main__':
import pylab as pb from matplotlib import pyplot as pb
pb.figure() pb.figure()
pb.pcolor(pb.rand(10,10),cmap=cmap_RB) pb.pcolor(pb.rand(10,10),cmap=cmap_RB)
pb.colorbar() pb.colorbar()

View file

@ -3,8 +3,8 @@
try: try:
import Tango #import Tango
import pylab as pb from matplotlib import pyplot as pb
except: except:
pass pass
import numpy as np import numpy as np
@ -17,11 +17,11 @@ def ax_default(fignum, ax):
fig = ax.figure fig = ax.figure
return fig, ax return fig, ax
def meanplot(x, mu, color=Tango.colorsHex['darkBlue'], ax=None, fignum=None, linewidth=2,**kw): def meanplot(x, mu, color='#3300FF', ax=None, fignum=None, linewidth=2,**kw):
_, axes = ax_default(fignum, ax) _, axes = ax_default(fignum, ax)
return axes.plot(x,mu,color=color,linewidth=linewidth,**kw) return axes.plot(x,mu,color=color,linewidth=linewidth,**kw)
def gpplot(x, mu, lower, upper, edgecol=Tango.colorsHex['darkBlue'], fillcol=Tango.colorsHex['lightBlue'], ax=None, fignum=None, **kwargs): def gpplot(x, mu, lower, upper, edgecol='#3300FF', fillcol='#33CCFF', ax=None, fignum=None, **kwargs):
_, axes = ax_default(fignum, ax) _, axes = ax_default(fignum, ax)
mu = mu.flatten() mu = mu.flatten()
@ -47,6 +47,32 @@ def gpplot(x, mu, lower, upper, edgecol=Tango.colorsHex['darkBlue'], fillcol=Tan
return plots return plots
def gperrors(x, mu, lower, upper, edgecol=None, ax=None, fignum=None, **kwargs):
_, axes = ax_default(fignum, ax)
mu = mu.flatten()
x = x.flatten()
lower = lower.flatten()
upper = upper.flatten()
plots = []
if edgecol is None:
edgecol='#3300FF'
if not 'alpha' in kwargs.keys():
kwargs['alpha'] = 1.
if not 'lw' in kwargs.keys():
kwargs['lw'] = 1.
plots.append(axes.errorbar(x,mu,yerr=np.vstack([mu-lower,upper-mu]),color=edgecol,**kwargs))
plots[-1][0].remove()
return plots
def removeRightTicks(ax=None): def removeRightTicks(ax=None):
ax = ax or pb.gca() ax = ax or pb.gca()
for i, line in enumerate(ax.get_yticklines()): for i, line in enumerate(ax.get_yticklines()):

View file

@ -9,7 +9,8 @@ import itertools
try: try:
import Tango import Tango
from matplotlib.cm import get_cmap from matplotlib.cm import get_cmap
import pylab as pb from matplotlib import pyplot as pb
from matplotlib import cm
except: except:
pass pass
@ -114,7 +115,7 @@ def plot_latent(model, labels=None, which_indices=None,
# create a function which computes the shading of latent space according to the output variance # create a function which computes the shading of latent space according to the output variance
def plot_function(x): def plot_function(x):
Xtest_full = np.zeros((x.shape[0], model.X.shape[1])) Xtest_full = np.zeros((x.shape[0], X.shape[1]))
Xtest_full[:, [input_1, input_2]] = x Xtest_full[:, [input_1, input_2]] = x
_, var = model.predict(Xtest_full, **predict_kwargs) _, var = model.predict(Xtest_full, **predict_kwargs)
var = var[:, :1] var = var[:, :1]
@ -137,7 +138,7 @@ def plot_latent(model, labels=None, which_indices=None,
view = ImshowController(ax, plot_function, view = ImshowController(ax, plot_function,
(xmin, ymin, xmax, ymax), (xmin, ymin, xmax, ymax),
resolution, aspect=aspect, interpolation='bilinear', resolution, aspect=aspect, interpolation='bilinear',
cmap=pb.cm.binary, **imshow_kwargs) cmap=cm.binary, **imshow_kwargs)
# make sure labels are in order of input: # make sure labels are in order of input:
labels = np.asarray(labels) labels = np.asarray(labels)
@ -192,17 +193,18 @@ def plot_latent(model, labels=None, which_indices=None,
if updates: if updates:
try: try:
ax.figure.canvas.show() fig.canvas.show()
except Exception as e: except Exception as e:
print("Could not invoke show: {}".format(e)) print("Could not invoke show: {}".format(e))
raw_input('Enter to continue') #raw_input('Enter to continue')
view.deactivate() return view
return ax return ax
def plot_magnification(model, labels=None, which_indices=None, def plot_magnification(model, labels=None, which_indices=None,
resolution=60, ax=None, marker='o', s=40, resolution=60, ax=None, marker='o', s=40,
fignum=None, plot_inducing=False, legend=True, fignum=None, plot_inducing=False, legend=True,
aspect='auto', updates=False): plot_limits=None,
aspect='auto', updates=False, mean=True, covariance=True, kern=None):
""" """
:param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc) :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
:param resolution: the resolution of the grid on which to evaluate the predictive variance :param resolution: the resolution of the grid on which to evaluate the predictive variance
@ -210,6 +212,8 @@ def plot_magnification(model, labels=None, which_indices=None,
if ax is None: if ax is None:
fig = pb.figure(num=fignum) fig = pb.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
else:
fig = ax.figure
Tango.reset() Tango.reset()
if labels is None: if labels is None:
@ -217,19 +221,90 @@ def plot_magnification(model, labels=None, which_indices=None,
input_1, input_2 = most_significant_input_dimensions(model, which_indices) input_1, input_2 = most_significant_input_dimensions(model, which_indices)
# first, plot the output variance as a function of the latent space #fethch the data points X that we'd like to plot
Xtest, xx, yy, xmin, xmax = x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution) X = model.X
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) if isinstance(X, VariationalPosterior):
X = X.mean
else:
X = X
if X.shape[0] > 1000:
print("Warning: subsampling X, as it has more samples then 1000. X.shape={!s}".format(X.shape))
subsample = np.random.choice(X.shape[0], size=1000, replace=False)
X = X[subsample]
labels = labels[subsample]
#=======================================================================
# <<<WORK IN PROGRESS>>>
# <<<DO NOT DELETE>>>
# plt.close('all')
# fig, ax = plt.subplots(1,1)
# from GPy.plotting.matplot_dep.dim_reduction_plots import most_significant_input_dimensions
# import matplotlib.patches as mpatches
# i1, i2 = most_significant_input_dimensions(m, None)
# xmin, xmax = 100, -100
# ymin, ymax = 100, -100
# legend_handles = []
#
# X = m.X.mean[:, [i1, i2]]
# X = m.X.variance[:, [i1, i2]]
#
# xmin = X[:,0].min(); xmax = X[:,0].max()
# ymin = X[:,1].min(); ymax = X[:,1].max()
# range_ = [[xmin, xmax], [ymin, ymax]]
# ul = np.unique(labels)
#
# for i, l in enumerate(ul):
# #cdict = dict(red =[(0., colors[i][0], colors[i][0]), (1., colors[i][0], colors[i][0])],
# # green=[(0., colors[i][0], colors[i][1]), (1., colors[i][1], colors[i][1])],
# # blue =[(0., colors[i][0], colors[i][2]), (1., colors[i][2], colors[i][2])],
# # alpha=[(0., 0., .0), (.5, .5, .5), (1., .5, .5)])
# #cmap = LinearSegmentedColormap('{}'.format(l), cdict)
# cmap = LinearSegmentedColormap.from_list('cmap_{}'.format(str(l)), [colors[i], colors[i]], 255)
# cmap._init()
# #alphas = .5*(1+scipy.special.erf(np.linspace(-2,2, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
# alphas = (scipy.special.erf(np.linspace(0,2.4, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
# cmap._lut[:, -1] = alphas
# print l
# x, y = X[labels==l].T
#
# heatmap, xedges, yedges = np.histogram2d(x, y, bins=300, range=range_)
# #heatmap, xedges, yedges = np.histogram2d(x, y, bins=100)
#
# im = ax.imshow(heatmap, extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], cmap=cmap, aspect='auto', interpolation='nearest', label=str(l))
# legend_handles.append(mpatches.Patch(color=colors[i], label=l))
# ax.set_xlim(xmin, xmax)
# ax.set_ylim(ymin, ymax)
# plt.legend(legend_handles, [l.get_label() for l in legend_handles])
# plt.draw()
# plt.show()
#=======================================================================
#Create an IMshow controller that can re-plot the latent space shading at a good resolution
if plot_limits is None:
xmin, ymin = X[:, [input_1, input_2]].min(0)
xmax, ymax = X[:, [input_1, input_2]].max(0)
x_r, y_r = xmax-xmin, ymax-ymin
xmin -= .1*x_r
xmax += .1*x_r
ymin -= .1*y_r
ymax += .1*y_r
else:
try:
xmin, xmax, ymin, ymax = plot_limits
except (TypeError, ValueError) as e:
raise e.__class__("Wrong plot limits: {} given -> need (xmin, xmax, ymin, ymax)".format(plot_limits))
def plot_function(x): def plot_function(x):
Xtest_full = np.zeros((x.shape[0], X.shape[1]))
Xtest_full[:, [input_1, input_2]] = x Xtest_full[:, [input_1, input_2]] = x
mf=model.magnification(Xtest_full) mf = model.predict_magnification(Xtest_full, kern=kern, mean=mean, covariance=covariance)
return mf return mf
view = ImshowController(ax, plot_function, view = ImshowController(ax, plot_function,
tuple(model.X.min(0)[:, [input_1, input_2]]) + tuple(model.X.max(0)[:, [input_1, input_2]]), (xmin, ymin, xmax, ymax),
resolution, aspect=aspect, interpolation='bilinear', resolution, aspect=aspect, interpolation='bilinear',
cmap=pb.cm.gray) cmap=cm.get_cmap('Greys'))
# make sure labels are in order of input: # make sure labels are in order of input:
ulabels = [] ulabels = []
@ -245,17 +320,17 @@ def plot_magnification(model, labels=None, which_indices=None,
elif type(ul) is np.int64: elif type(ul) is np.int64:
this_label = 'class %i' % ul this_label = 'class %i' % ul
else: else:
this_label = 'class %i' % i this_label = unicode(ul)
m = marker.next() m = marker.next()
index = np.nonzero(labels == ul)[0] index = np.nonzero(labels == ul)[0]
if model.input_dim == 1: if model.input_dim == 1:
x = model.X[index, input_1] x = X[index, input_1]
y = np.zeros(index.size) y = np.zeros(index.size)
else: else:
x = model.X[index, input_1] x = X[index, input_1]
y = model.X[index, input_2] y = X[index, input_2]
ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label) ax.scatter(x, y, marker=m, s=s, c=Tango.nextMedium(), label=this_label, linewidth=.2, edgecolor='k', alpha=.9)
ax.set_xlabel('latent dimension %i' % input_1) ax.set_xlabel('latent dimension %i' % input_1)
ax.set_ylabel('latent dimension %i' % input_2) ax.set_ylabel('latent dimension %i' % input_2)
@ -263,19 +338,29 @@ def plot_magnification(model, labels=None, which_indices=None,
if not np.all(labels == 1.) and legend: if not np.all(labels == 1.) and legend:
ax.legend(loc=0, numpoints=1) ax.legend(loc=0, numpoints=1)
ax.set_xlim(xmin[0], xmax[0]) ax.set_xlim((xmin, xmax))
ax.set_ylim(xmin[1], xmax[1]) ax.set_ylim((ymin, ymax))
ax.grid(b=False) # remove the grid if present, it doesn't look good
ax.set_aspect('auto') # set a nice aspect ratio
if plot_inducing: if plot_inducing and hasattr(model, 'Z'):
ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w') Z = model.Z
ax.scatter(Z[:, input_1], Z[:, input_2], c='w', s=18, marker="^", edgecolor='k', linewidth=.3, alpha=.7)
try:
fig.canvas.draw()
fig.tight_layout()
fig.canvas.draw()
except Exception as e:
print("Could not invoke tight layout: {}".format(e))
pass
if updates: if updates:
fig.canvas.show() try:
raw_input('Enter to continue') fig.canvas.draw()
fig.canvas.show()
pb.title('Magnification Factor') except Exception as e:
print("Could not invoke show: {}".format(e))
#raw_input('Enter to continue')
return view
return ax return ax
@ -314,8 +399,8 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
this_label = 'class %i' % i this_label = 'class %i' % i
m = marker.next() m = marker.next()
index = np.nonzero(data_labels == ul)[0] index = np.nonzero(data_labels == ul)[0]
x = model.X[index, input_1] x = X[index, input_1]
y = model.X[index, input_2] y = X[index, input_2]
ax.scatter(x, y, marker=m, s=data_s, color=Tango.nextMedium(), label=this_label) ax.scatter(x, y, marker=m, s=data_s, color=Tango.nextMedium(), label=this_label)
ax.set_xlabel('latent dimension %i' % input_1) ax.set_xlabel('latent dimension %i' % input_1)
@ -323,7 +408,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
controller = ImAnnotateController(ax, controller = ImAnnotateController(ax,
plot_function, plot_function,
tuple(model.X.min(0)[:, significant_dims]) + tuple(model.X.max(0)[:, significant_dims]), tuple(X.min(0)[:, significant_dims]) + tuple(X.max(0)[:, significant_dims]),
resolution=resolution, resolution=resolution,
aspect=aspect, aspect=aspect,
cmap=get_cmap('jet'), cmap=get_cmap('jet'),

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
try: try:
import pylab as pb from matplotlib import pyplot as pb
except: except:
pass pass
#import numpy as np #import numpy as np

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
import pylab as pb from matplotlib import pyplot as pb
import Tango import Tango
from matplotlib.textpath import TextPath from matplotlib.textpath import TextPath
from matplotlib.transforms import offset_copy from matplotlib.transforms import offset_copy

View file

@ -9,6 +9,9 @@ class AxisEventController(object):
def __init__(self, ax): def __init__(self, ax):
self.ax = ax self.ax = ax
self.activate() self.activate()
def __del__(self):
self.deactivate()
return self
def deactivate(self): def deactivate(self):
for cb_class in self.ax.callbacks.callbacks.values(): for cb_class in self.ax.callbacks.callbacks.values():
for cb_num in cb_class.keys(): for cb_num in cb_class.keys():
@ -81,9 +84,9 @@ class BufferedAxisChangedController(AxisChangedController):
def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=None, **kwargs): def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=None, **kwargs):
""" """
Buffered axis changed controller. Controls the buffer and handles update events for when the axes changed. Buffered axis changed controller. Controls the buffer and handles update events for when the axes changed.
Updated plotting will be after first reload (first time will be within plot limits, after that the limits will be buffered) Updated plotting will be after first reload (first time will be within plot limits, after that the limits will be buffered)
:param plot_function: :param plot_function:
function to use for creating image for plotting (return ndarray-like) function to use for creating image for plotting (return ndarray-like)
plot_function gets called with (2D!) Xtest grid if replotting required plot_function gets called with (2D!) Xtest grid if replotting required

View file

@ -4,7 +4,7 @@
import numpy as np import numpy as np
try: try:
import Tango import Tango
import pylab as pb from matplotlib import pyplot as pb
except: except:
pass pass
from base_plots import x_frame1D, x_frame2D from base_plots import x_frame1D, x_frame2D

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
try: try:
import pylab as pb from matplotlib import pyplot as pb
from matplotlib.patches import Polygon from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection from matplotlib.collections import PatchCollection
#from matplotlib import cm #from matplotlib import cm

View file

@ -1,25 +1,82 @@
# Copyright (c) 2012-2015, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2015, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
try:
import Tango
import pylab as pb
except:
pass
import numpy as np import numpy as np
from base_plots import gpplot, x_frame1D, x_frame2D from . import Tango
from .base_plots import gpplot, x_frame1D, x_frame2D,gperrors
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from ...models.warped_gp import WarpedGP from ...models.warped_gp import WarpedGP
from scipy import sparse from scipy import sparse
from ...core.parameterization.variational import VariationalPosterior from ...core.parameterization.variational import VariationalPosterior
from matplotlib import pyplot as plt
def plot_data(model, which_data_rows='all',
which_data_ycols='all', visible_dims=None,
fignum=None, ax=None, data_symbol='kx',mew=1.5):
"""
Plot the training data
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
Can plot only part of the data
using which_data_rows and which_data_ycols.
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param visible_dims: an array specifying the input dimensions to plot (maximum two)
:type visible_dims: a numpy array
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
"""
#deal with optional arguments
if which_data_rows == 'all':
which_data_rows = slice(None)
if which_data_ycols == 'all':
which_data_ycols = np.arange(model.output_dim)
if ax is None:
fig = plt.figure(num=fignum)
ax = fig.add_subplot(111)
#data
X = model.X
Y = model.Y
#work out what the inputs are for plotting (1D or 2D)
if visible_dims is None:
visible_dims = np.arange(model.input_dim)
assert visible_dims.size <= 2, "Visible inputs cannot be larger than two"
free_dims = visible_dims
plots = {}
#one dimensional plotting
if len(free_dims) == 1:
for d in which_data_ycols:
plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=mew)
#2D plotting
elif len(free_dims) == 2:
for d in which_data_ycols:
plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40,
Y[which_data_rows, d], cmap=plt.cm.jet, vmin=Y.min(), vmax=Y.max(), linewidth=0.)
else:
raise NotImplementedError("Cannot define a frame with more than two input dimensions")
return plots
def plot_fit(model, plot_limits=None, which_data_rows='all', def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[], which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None, levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False, plot_raw=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx', linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx',
apply_link=False, samples_f=0, plot_uncertain_inputs=True, predict_kw=None): apply_link=False, samples_y=0, plot_uncertain_inputs=True, predict_kw=None, plot_training_data=True):
""" """
Plot the posterior of the GP. Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations. - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -37,25 +94,32 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
:type which_data_rows: 'all' or a list of integers :type which_data_rows: 'all' or a list of integers
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples :type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:type levels: int :type levels: int
:param samples: the number of a posteriori samples to plot p(y*|y) :param samples: the number of a posteriori samples to plot p(f*|y)
:type samples: int :type samples: int
:param fignum: figure to plot on. :param fignum: figure to plot on.
:type fignum: figure number :type fignum: figure number
:param ax: axes to plot on. :param ax: axes to plot on.
:type ax: axes handle :type ax: axes handle
:type output: integer (first output is 0) :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param plot_raw: Whether to plot the raw function p(f|y)
:type plot_raw: boolean
:param linecol: color of line to plot. :param linecol: color of line to plot.
:type linecol: :type linecol: hex or color
:param fillcol: color of fill :param fillcol: color of fill
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure :type fillcol: hex or color
:param apply_link: apply the link function if plotting f (default false) :param apply_link: apply the link function if plotting f (default false), as well as posterior samples if requested
:type apply_link: boolean :type apply_link: boolean
:param samples_f: the number of posteriori f samples to plot p(f*|y) :param samples_y: the number of posteriori f samples to plot p(y*|y)
:type samples_f: int :type samples_y: int
:param plot_uncertain_inputs: plot the uncertainty of the inputs as error bars if they have uncertainty (BGPLVM etc.)
:type plot_uncertain_inputs: boolean
:param predict_kw: keyword args for _raw_predict and predict functions if required
:type predict_kw: dict
:param plot_training_data: whether or not to plot the training points
:type plot_training_data: boolean
""" """
#deal with optional arguments #deal with optional arguments
if which_data_rows == 'all': if which_data_rows == 'all':
@ -65,7 +129,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#if len(which_data_ycols)==0: #if len(which_data_ycols)==0:
#raise ValueError('No data selected for plotting') #raise ValueError('No data selected for plotting')
if ax is None: if ax is None:
fig = pb.figure(num=fignum) fig = plt.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs(): if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
@ -117,31 +181,38 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
Y_metadata = {'output_index': extra_data} Y_metadata = {'output_index': extra_data}
else: else:
Y_metadata['output_index'] = extra_data Y_metadata['output_index'] = extra_data
if isinstance(model, WarpedGP): if isinstance(model, WarpedGP):
m, v = model.predict(Xgrid, full_cov=False, median=True, Y_metadata=Y_metadata, **predict_kw) m, v = model.predict(Xgrid, full_cov=False, median=True, Y_metadata=Y_metadata, **predict_kw)
#print np.concatenate((Xgrid, m), axis=1) #print np.concatenate((Xgrid, m), axis=1)
else: else:
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw) m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
lower, upper = model.predict_quantiles(Xgrid, Y_metadata=Y_metadata) fmu, fv = model._raw_predict(Xgrid, full_cov=False, **predict_kw)
lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=Y_metadata)
for d in which_data_ycols: for d in which_data_ycols:
plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol) plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5) #if not plot_raw: plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=1.5)
if not plot_raw and plot_training_data:
plots['dataplot'] = plot_data(model=model, which_data_rows=which_data_rows,
visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum)
#optionally plot some samples #optionally plot some samples
if samples: #NOTE not tested with fixed_inputs if samples: #NOTE not tested with fixed_inputs
Ysim = model.posterior_samples(Xgrid, samples, Y_metadata=Y_metadata) Fsim = model.posterior_samples_f(Xgrid, samples)
print Ysim.shape if apply_link:
print Xnew.shape Fsim = model.likelihood.gp_link.transf(Fsim)
for yi in Ysim.T:
plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
if samples_f: #NOTE not tested with fixed_inputs
Fsim = model.posterior_samples_f(Xgrid, samples_f)
for fi in Fsim.T: for fi in Fsim.T:
plots['posterior_samples_f'] = ax.plot(Xnew, fi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) plots['posterior_samples'] = ax.plot(Xnew, fi[:,None], '#3300FF', linewidth=0.25)
#ax.plot(Xnew, fi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
if samples_y: #NOTE not tested with fixed_inputs
Ysim = model.posterior_samples(Xgrid, samples_y, Y_metadata=Y_metadata)
for yi in Ysim.T:
plots['posterior_samples_y'] = ax.scatter(Xnew, yi[:,None], s=5, c=Tango.colorsHex['darkBlue'], marker='o', alpha=0.5)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs. #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
@ -206,8 +277,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw) m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
for d in which_data_ycols: for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T m_d = m[:,d].reshape(resolution, resolution).T
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=plt.cm.jet)
if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) #if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
if not plot_raw and plot_training_data:
plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
#set the limits of the plot to some sensible values #set the limits of the plot to some sensible values
ax.set_xlim(xmin[0], xmax[0]) ax.set_xlim(xmin[0], xmax[0])
@ -272,3 +345,82 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True, X_
return f_inputs return f_inputs
else: else:
return X return X
def errorbars_trainset(model, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
fignum=None, ax=None,
linecol='red', data_symbol='kx',
predict_kw=None, plot_training_data=True, **kwargs):
"""
Plot the posterior error bars corresponding to the training data
- For higher dimensions than two, use fixed_inputs to plot the data points with some of the inputs fixed.
Can plot only part of the data
using which_data_rows and which_data_ycols.
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param plot_training_data: whether or not to plot the training points
:type plot_training_data: boolean
"""
#deal with optional arguments
if which_data_rows == 'all':
which_data_rows = slice(None)
if which_data_ycols == 'all':
which_data_ycols = np.arange(model.output_dim)
if ax is None:
fig = plt.figure(num=fignum)
ax = fig.add_subplot(111)
X = model.X
Y = model.Y
if predict_kw is None:
predict_kw = {}
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
plots = {}
#one dimensional plotting
if len(free_dims) == 1:
m, v = model.predict(X, full_cov=False, Y_metadata=model.Y_metadata, **predict_kw)
fmu, fv = model._raw_predict(X, full_cov=False, **predict_kw)
lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=model.Y_metadata)
for d in which_data_ycols:
plots['gperrors'] = gperrors(X, m[:, d], lower[:, d], upper[:, d], edgecol=linecol, ax=ax, fignum=fignum, **kwargs )
if plot_training_data:
plots['dataplot'] = plot_data(model=model, which_data_rows=which_data_rows,
visible_dims=free_dims, data_symbol=data_symbol, mew=1.5, ax=ax, fignum=fignum)
#set the limits of the plot to some sensible values
ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(X[:,free_dims].min(), X[:,free_dims].max())
ax.set_ylim(ymin, ymax)
elif len(free_dims) == 2:
raise NotImplementedError("Not implemented yet")
else:
raise NotImplementedError("Cannot define a frame with more than two input dimensions")
return plots

View file

@ -4,7 +4,7 @@
import numpy as np import numpy as np
try: try:
import pylab as pb from matplotlib import pyplot as pb
except: except:
pass pass

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
import pylab as pb from matplotlib import pyplot as pb
def plot(model, ax=None, fignum=None, Z_height=None, **kwargs): def plot(model, ax=None, fignum=None, Z_height=None, **kwargs):

View file

@ -1,4 +1,4 @@
import pylab as pb, numpy as np from matplotlib import pyplot as pb, numpy as np
def plot(parameterized, fignum=None, ax=None, colors=None, figsize=(12, 6)): def plot(parameterized, fignum=None, ax=None, colors=None, figsize=(12, 6)):
""" """

View file

@ -0,0 +1,109 @@
'''
Created on 4 Sep 2015
@author: maxz
'''
import unittest
import numpy as np
import GPy
class BGPLVMTest(unittest.TestCase):
def setUp(self):
np.random.seed(12345)
X, W = np.random.normal(0,1,(100,6)), np.random.normal(0,1,(6,13))
Y = X.dot(W) + np.random.normal(0, .1, (X.shape[0], W.shape[1]))
self.inan = np.random.binomial(1, .1, Y.shape).astype(bool)
self.X, self.W, self.Y = X,W,Y
self.Q = 3
self.m_full = GPy.models.BayesianGPLVM(Y, self.Q)
def test_lik_comparisons_m1_s0(self):
# Test if the different implementations give the exact same likelihood as the full model.
# All of the following settings should give the same likelihood and gradients as the full model:
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=False)
m[:] = self.m_full[:]
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
assert(m.checkgrad())
def test_predict_missing_data(self):
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
m[:] = self.m_full[:]
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
self.assertRaises(NotImplementedError, m.predict, m.X, full_cov=True)
mu1, var1 = m.predict(m.X, full_cov=False)
mu2, var2 = self.m_full.predict(self.m_full.X, full_cov=False)
np.testing.assert_allclose(mu1, mu2)
np.testing.assert_allclose(var1, var2)
mu1, var1 = m.predict(m.X.mean, full_cov=True)
mu2, var2 = self.m_full.predict(self.m_full.X.mean, full_cov=True)
np.testing.assert_allclose(mu1, mu2)
np.testing.assert_allclose(var1[:,:,0], var2)
mu1, var1 = m.predict(m.X.mean, full_cov=False)
mu2, var2 = self.m_full.predict(self.m_full.X.mean, full_cov=False)
np.testing.assert_allclose(mu1, mu2)
np.testing.assert_allclose(var1[:,[0]], var2)
def test_lik_comparisons_m0_s0(self):
# Test if the different implementations give the exact same likelihood as the full model.
# All of the following settings should give the same likelihood and gradients as the full model:
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=False)
m[:] = self.m_full[:]
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
assert(m.checkgrad())
def test_lik_comparisons_m1_s1(self):
# Test if the different implementations give the exact same likelihood as the full model.
# All of the following settings should give the same likelihood and gradients as the full model:
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
m[:] = self.m_full[:]
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
assert(m.checkgrad())
def test_lik_comparisons_m0_s1(self):
# Test if the different implementations give the exact same likelihood as the full model.
# All of the following settings should give the same likelihood and gradients as the full model:
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=True, batchsize=self.Y.shape[1])
m[:] = self.m_full[:]
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
assert(m.checkgrad())
def test_gradients_missingdata(self):
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=False, batchsize=self.Y.shape[1])
assert(m.checkgrad())
def test_gradients_missingdata_stochastics(self):
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=1)
assert(m.checkgrad())
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=4)
assert(m.checkgrad())
def test_gradients_stochastics(self):
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=True, batchsize=1)
assert(m.checkgrad())
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=True, batchsize=4)
assert(m.checkgrad())
def test_predict(self):
# Test if the different implementations give the exact same likelihood as the full model.
# All of the following settings should give the same likelihood and gradients as the full model:
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
m[:] = self.m_full[:]
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
assert(m.checkgrad())
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()

View file

@ -0,0 +1,37 @@
'''
Created on 4 Sep 2015
@author: maxz
'''
import unittest
from GPy.util.caching import Cacher
from pickle import PickleError
class Test(unittest.TestCase):
def setUp(self):
def op(x):
return x
self.cache = Cacher(op, 1)
def test_pickling(self):
self.assertRaises(PickleError, self.cache.__getstate__)
self.assertRaises(PickleError, self.cache.__setstate__)
def test_copy(self):
tmp = self.cache.__deepcopy__()
assert(tmp.operation is self.cache.operation)
self.assertEqual(tmp.limit, self.cache.limit)
def test_reset(self):
self.cache.reset()
self.assertDictEqual(self.cache.cached_input_ids, {}, )
self.assertDictEqual(self.cache.cached_outputs, {}, )
self.assertDictEqual(self.cache.inputs_changed, {}, )
def test_name(self):
assert(self.cache.__name__ == self.cache.operation.__name__)
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()

View file

@ -2,11 +2,21 @@ import numpy as np
import scipy as sp import scipy as sp
from GPy.util import choleskies from GPy.util import choleskies
import GPy import GPy
from ..util.config import config
import unittest
try:
from ..util import linalg_cython
from ..util import choleskies_cython
config.set('cython', 'working', 'True')
except ImportError:
config.set('cython', 'working', 'False')
""" """
These tests make sure that the opure python and cython codes work the same These tests make sure that the pure python and cython codes work the same
""" """
@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
class CythonTestChols(np.testing.TestCase): class CythonTestChols(np.testing.TestCase):
def setUp(self): def setUp(self):
self.flat = np.random.randn(45,5) self.flat = np.random.randn(45,5)
@ -20,6 +30,7 @@ class CythonTestChols(np.testing.TestCase):
A2 = choleskies._triang_to_flat_cython(self.triang) A2 = choleskies._triang_to_flat_cython(self.triang)
np.testing.assert_allclose(A1, A2) np.testing.assert_allclose(A1, A2)
@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
class test_stationary(np.testing.TestCase): class test_stationary(np.testing.TestCase):
def setUp(self): def setUp(self):
self.k = GPy.kern.RBF(10) self.k = GPy.kern.RBF(10)
@ -49,17 +60,16 @@ class test_stationary(np.testing.TestCase):
g2 = self.k._lengthscale_grads_cython(self.dKxz, self.X, self.Z) g2 = self.k._lengthscale_grads_cython(self.dKxz, self.X, self.Z)
np.testing.assert_allclose(g1, g2) np.testing.assert_allclose(g1, g2)
@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
class test_choleskies_backprop(np.testing.TestCase): class test_choleskies_backprop(np.testing.TestCase):
def setUp(self): def setUp(self):
self.dL, self.L = np.random.randn(2, 100, 100) a =np.random.randn(10,12)
A = a.dot(a.T)
self.L = GPy.util.linalg.jitchol(A)
self.dL = np.random.randn(10,10)
def test(self): def test(self):
r1 = GPy.util.choleskies._backprop_gradient_pure(self.dL, self.L) r1 = choleskies._backprop_gradient_pure(self.dL, self.L)
r2 = GPy.util.choleskies.choleskies_cython.backprop_gradient(self.dL, self.L) r2 = choleskies_cython.backprop_gradient(self.dL, self.L)
r3 = choleskies_cython.backprop_gradient_par_c(self.dL, self.L)
np.testing.assert_allclose(r1, r2) np.testing.assert_allclose(r1, r2)
np.testing.assert_allclose(r1, r3)

99
GPy/testing/gp_tests.py Normal file
View file

@ -0,0 +1,99 @@
'''
Created on 4 Sep 2015
@author: maxz
'''
import unittest
import numpy as np, GPy
from GPy.core.parameterization.variational import NormalPosterior
class Test(unittest.TestCase):
def setUp(self):
np.random.seed(12345)
self.N = 20
self.N_new = 50
self.D = 1
self.X = np.random.uniform(-3., 3., (self.N, 1))
self.Y = np.sin(self.X) + np.random.randn(self.N, self.D) * 0.05
self.X_new = np.random.uniform(-3., 3., (self.N_new, 1))
def test_setxy_bgplvm(self):
k = GPy.kern.RBF(1)
m = GPy.models.BayesianGPLVM(self.Y, 2, kernel=k)
mu, var = m.predict(m.X)
X = m.X.copy()
Xnew = NormalPosterior(m.X.mean[:10].copy(), m.X.variance[:10].copy())
m.set_XY(Xnew, m.Y[:10])
assert(m.checkgrad())
m.set_XY(X, self.Y)
mu2, var2 = m.predict(m.X)
np.testing.assert_allclose(mu, mu2)
np.testing.assert_allclose(var, var2)
def test_setxy_gplvm(self):
k = GPy.kern.RBF(1)
m = GPy.models.GPLVM(self.Y, 2, kernel=k)
mu, var = m.predict(m.X)
X = m.X.copy()
Xnew = X[:10].copy()
m.set_XY(Xnew, m.Y[:10])
assert(m.checkgrad())
m.set_XY(X, self.Y)
mu2, var2 = m.predict(m.X)
np.testing.assert_allclose(mu, mu2)
np.testing.assert_allclose(var, var2)
def test_setxy_gp(self):
k = GPy.kern.RBF(1)
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
mu, var = m.predict(m.X)
X = m.X.copy()
m.set_XY(m.X[:10], m.Y[:10])
assert(m.checkgrad())
m.set_XY(X, self.Y)
mu2, var2 = m.predict(m.X)
np.testing.assert_allclose(mu, mu2)
np.testing.assert_allclose(var, var2)
def test_mean_function(self):
from GPy.core.parameterization.param import Param
from GPy.core.mapping import Mapping
class Parabola(Mapping):
def __init__(self, variance, degree=2, name='parabola'):
super(Parabola, self).__init__(1, 1, name)
self.variance = Param('variance', np.ones(degree+1) * variance)
self.degree = degree
self.link_parameter(self.variance)
def f(self, X):
p = self.variance[0] * np.ones(X.shape)
for i in range(1, self.degree+1):
p += self.variance[i] * X**(i)
return p
def gradients_X(self, dL_dF, X):
grad = np.zeros(X.shape)
for i in range(1, self.degree+1):
grad += (i) * self.variance[i] * X**(i-1)
return grad
def update_gradients(self, dL_dF, X):
for i in range(self.degree+1):
self.variance.gradient[i] = (dL_dF * X**(i)).sum(0)
X = np.linspace(-2, 2, 100)[:, None]
k = GPy.kern.RBF(1)
k.randomize()
p = Parabola(.3)
p.randomize()
Y = p.f(X) + np.random.multivariate_normal(np.zeros(X.shape[0]), k.K(X)+np.eye(X.shape[0])*1e-8)[:,None] + np.random.normal(0, .1, (X.shape[0], 1))
m = GPy.models.GPRegression(X, Y, mean_function=p)
m.randomize()
assert(m.checkgrad())
_ = m.predict(m.X)
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
unittest.main()

View file

@ -8,11 +8,12 @@ The test cases for various inference algorithms
import unittest, itertools import unittest, itertools
import numpy as np import numpy as np
import GPy import GPy
#np.seterr(invalid='raise')
class InferenceXTestCase(unittest.TestCase): class InferenceXTestCase(unittest.TestCase):
def genData(self): def genData(self):
np.random.seed(1)
D1,D2,N = 12,12,50 D1,D2,N = 12,12,50
x = np.linspace(0, 4 * np.pi, N)[:, None] x = np.linspace(0, 4 * np.pi, N)[:, None]

View file

@ -6,9 +6,16 @@ import numpy as np
import GPy import GPy
import sys import sys
from GPy.core.parameterization.param import Param from GPy.core.parameterization.param import Param
from ..util.config import config
verbose = 0 verbose = 0
try:
from ..util import linalg_cython
config.set('cython', 'working', 'True')
except ImportError:
config.set('cython', 'working', 'False')
class Kern_check_model(GPy.core.Model): class Kern_check_model(GPy.core.Model):
""" """
@ -245,6 +252,11 @@ class KernelGradientTestsContinuous(unittest.TestCase):
continuous_kerns = ['RBF', 'Linear'] continuous_kerns = ['RBF', 'Linear']
self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns] self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns]
def test_MLP(self):
k = GPy.kern.MLP(self.D,ARD=True)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Matern32(self): def test_Matern32(self):
k = GPy.kern.Matern32(self.D) k = GPy.kern.Matern32(self.D)
k.randomize() k.randomize()
@ -313,6 +325,11 @@ class KernelGradientTestsContinuous(unittest.TestCase):
k.randomize() k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_standard_periodic(self):
k = GPy.kern.StdPeriodic(self.D, self.D-1)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
class KernelTestsMiscellaneous(unittest.TestCase): class KernelTestsMiscellaneous(unittest.TestCase):
def setUp(self): def setUp(self):
N, D = 100, 10 N, D = 100, 10
@ -366,6 +383,7 @@ class KernelTestsNonContinuous(unittest.TestCase):
X2 = self.X2[self.X2[:,-1]!=2] X2 = self.X2[self.X2[:,-1]!=2]
self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1)) self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
@unittest.skipIf(not config.getboolean('cython', 'working'),"Cython modules have not been built on this machine")
class Coregionalize_cython_test(unittest.TestCase): class Coregionalize_cython_test(unittest.TestCase):
""" """
Make sure that the coregionalize kernel work with and without cython enabled Make sure that the coregionalize kernel work with and without cython enabled
@ -432,6 +450,104 @@ class KernelTestsProductWithZeroValues(unittest.TestCase):
self.assertFalse(np.any(np.isnan(target)), self.assertFalse(np.any(np.isnan(target)),
"Gradient resulted in NaN") "Gradient resulted in NaN")
class Kernel_Psi_statistics_GradientTests(unittest.TestCase):
def setUp(self):
from GPy.core.parameterization.variational import NormalPosterior
N,M,Q = 100,20,3
X = np.random.randn(N,Q)
X_var = np.random.rand(N,Q)+0.01
self.Z = np.random.randn(M,Q)
self.qX = NormalPosterior(X, X_var)
self.w1 = np.random.randn(N)
self.w2 = np.random.randn(N,M)
self.w3 = np.random.randn(M,M)
self.w3 = self.w3+self.w3.T
self.w3n = np.random.randn(N,M,M)
self.w3n = self.w3n+np.swapaxes(self.w3n, 1,2)
def test_kernels(self):
from GPy.kern import RBF,Linear,MLP
Q = self.Z.shape[1]
kernels = [RBF(Q,ARD=True), Linear(Q,ARD=True)]
for k in kernels:
k.randomize()
self._test_kernel_param(k)
self._test_Z(k)
self._test_qX(k)
self._test_kernel_param(k, psi2n=True)
self._test_Z(k, psi2n=True)
self._test_qX(k, psi2n=True)
def _test_kernel_param(self, kernel, psi2n=False):
def f(p):
kernel.param_array[:] = p
psi0 = kernel.psi0(self.Z, self.qX)
psi1 = kernel.psi1(self.Z, self.qX)
if not psi2n:
psi2 = kernel.psi2(self.Z, self.qX)
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3*psi2).sum()
else:
psi2 = kernel.psi2n(self.Z, self.qX)
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3n*psi2).sum()
def df(p):
kernel.param_array[:] = p
kernel.update_gradients_expectations(self.w1, self.w2, self.w3 if not psi2n else self.w3n, self.Z, self.qX)
return kernel.gradient.copy()
from GPy.models import GradientChecker
m = GradientChecker(f, df, kernel.param_array.copy())
self.assertTrue(m.checkgrad())
def _test_Z(self, kernel, psi2n=False):
def f(p):
psi0 = kernel.psi0(p, self.qX)
psi1 = kernel.psi1(p, self.qX)
psi2 = kernel.psi2(p, self.qX)
if not psi2n:
psi2 = kernel.psi2(p, self.qX)
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3*psi2).sum()
else:
psi2 = kernel.psi2n(p, self.qX)
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3n*psi2).sum()
def df(p):
return kernel.gradients_Z_expectations(self.w1, self.w2, self.w3 if not psi2n else self.w3n, p, self.qX)
from GPy.models import GradientChecker
m = GradientChecker(f, df, self.Z.copy())
self.assertTrue(m.checkgrad())
def _test_qX(self, kernel, psi2n=False):
def f(p):
self.qX.param_array[:] = p
self.qX._trigger_params_changed()
psi0 = kernel.psi0(self.Z, self.qX)
psi1 = kernel.psi1(self.Z, self.qX)
if not psi2n:
psi2 = kernel.psi2(self.Z, self.qX)
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3*psi2).sum()
else:
psi2 = kernel.psi2n(self.Z, self.qX)
return (self.w1*psi0).sum() + (self.w2*psi1).sum() + (self.w3n*psi2).sum()
def df(p):
self.qX.param_array[:] = p
self.qX._trigger_params_changed()
grad = kernel.gradients_qX_expectations(self.w1, self.w2, self.w3 if not psi2n else self.w3n, self.Z, self.qX)
self.qX.set_gradients(grad)
return self.qX.gradient.copy()
from GPy.models import GradientChecker
m = GradientChecker(f, df, self.qX.param_array.copy())
self.assertTrue(m.checkgrad())
if __name__ == "__main__": if __name__ == "__main__":
print("Running unit tests, please be (very) patient...") print("Running unit tests, please be (very) patient...")

View file

@ -7,10 +7,8 @@ from GPy.models import GradientChecker
import functools import functools
import inspect import inspect
from GPy.likelihoods import link_functions from GPy.likelihoods import link_functions
from GPy.core.parameterization import Param
from functools import partial from functools import partial
#np.random.seed(300) fixed_seed = 7
#np.random.seed(4)
#np.seterr(divide='raise') #np.seterr(divide='raise')
def dparam_partial(inst_func, *args): def dparam_partial(inst_func, *args):
@ -105,6 +103,7 @@ class TestNoiseModels(object):
Generic model checker Generic model checker
""" """
def setUp(self): def setUp(self):
np.random.seed(fixed_seed)
self.N = 15 self.N = 15
self.D = 3 self.D = 3
self.X = np.random.rand(self.N, self.D)*10 self.X = np.random.rand(self.N, self.D)*10
@ -218,7 +217,8 @@ class TestNoiseModels(object):
"constraints": [(".*variance", self.constrain_positive)] "constraints": [(".*variance", self.constrain_positive)]
}, },
"laplace": True, "laplace": True,
"ep": False # FIXME: Should be True when we have it working again "ep": False, # FIXME: Should be True when we have it working again
"variational_expectations": True,
}, },
"Gaussian_log": { "Gaussian_log": {
"model": GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var), "model": GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var),
@ -227,7 +227,8 @@ class TestNoiseModels(object):
"vals": [self.var], "vals": [self.var],
"constraints": [(".*variance", self.constrain_positive)] "constraints": [(".*variance", self.constrain_positive)]
}, },
"laplace": True "laplace": True,
"variational_expectations": True
}, },
#"Gaussian_probit": { #"Gaussian_probit": {
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N), #"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
@ -252,7 +253,8 @@ class TestNoiseModels(object):
"link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)], "link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)],
"laplace": True, "laplace": True,
"Y": self.binary_Y, "Y": self.binary_Y,
"ep": False # FIXME: Should be True when we have it working again "ep": False, # FIXME: Should be True when we have it working again
"variational_expectations": True
}, },
"Exponential_default": { "Exponential_default": {
"model": GPy.likelihoods.Exponential(), "model": GPy.likelihoods.Exponential(),
@ -347,6 +349,10 @@ class TestNoiseModels(object):
ep = attributes["ep"] ep = attributes["ep"]
else: else:
ep = False ep = False
if "variational_expectations" in attributes:
var_exp = attributes["variational_expectations"]
else:
var_exp = False
#if len(param_vals) > 1: #if len(param_vals) > 1:
#raise NotImplementedError("Cannot support multiple params in likelihood yet!") #raise NotImplementedError("Cannot support multiple params in likelihood yet!")
@ -377,6 +383,11 @@ class TestNoiseModels(object):
if ep: if ep:
#ep likelihood gradcheck #ep likelihood gradcheck
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints yield self.t_ep_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
if var_exp:
#Need to specify mu and var!
yield self.t_varexp, model, Y, Y_metadata
yield self.t_dexp_dmu, model, Y, Y_metadata
yield self.t_dexp_dvar, model, Y, Y_metadata
self.tearDown() self.tearDown()
@ -603,6 +614,87 @@ class TestNoiseModels(object):
print(m) print(m)
assert m.checkgrad(verbose=1, step=step) assert m.checkgrad(verbose=1, step=step)
################
# variational expectations #
################
@with_setup(setUp, tearDown)
def t_varexp(self, model, Y, Y_metadata):
#Test that the analytic implementation (if it exists) matches the generic gauss
#hermite implementation
print("\n{}".format(inspect.stack()[0][3]))
#Make mu and var (marginal means and variances of q(f)) draws from a GP
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
L = GPy.util.linalg.jitchol(k)
mu = L.dot(np.random.randn(*Y.shape))
#Variance must be positive
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
expectation = model.variational_expectations(Y=Y, m=mu, v=var, gh_points=None, Y_metadata=Y_metadata)[0]
#Implementation of gauss hermite integration
shape = mu.shape
gh_x, gh_w= np.polynomial.hermite.hermgauss(50)
m,v,Y = mu.flatten(), var.flatten(), Y.flatten()
#make a grid of points
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None]
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
# broadcast needs to be handled carefully.
logp = model.logpdf(X, Y[:,None], Y_metadata=Y_metadata)
#average over the gird to get derivatives of the Gaussian's parameters
#division by pi comes from fact that for each quadrature we need to scale by 1/sqrt(pi)
expectation_gh = np.dot(logp, gh_w)/np.sqrt(np.pi)
expectation_gh = expectation_gh.reshape(*shape)
np.testing.assert_almost_equal(expectation, expectation_gh, decimal=5)
@with_setup(setUp, tearDown)
def t_dexp_dmu(self, model, Y, Y_metadata):
print("\n{}".format(inspect.stack()[0][3]))
#Make mu and var (marginal means and variances of q(f)) draws from a GP
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
L = GPy.util.linalg.jitchol(k)
mu = L.dot(np.random.randn(*Y.shape))
#Variance must be positive
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
expectation = functools.partial(model.variational_expectations, Y=Y, v=var, gh_points=None, Y_metadata=Y_metadata)
#Function to get the nth returned value
def F(mu):
return expectation(m=mu)[0]
def dmu(mu):
return expectation(m=mu)[1]
grad = GradientChecker(F, dmu, mu.copy(), 'm')
grad.randomize()
print(grad)
print(model)
assert grad.checkgrad(verbose=1)
@with_setup(setUp, tearDown)
def t_dexp_dvar(self, model, Y, Y_metadata):
print("\n{}".format(inspect.stack()[0][3]))
#Make mu and var (marginal means and variances of q(f)) draws from a GP
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
L = GPy.util.linalg.jitchol(k)
mu = L.dot(np.random.randn(*Y.shape))
#Variance must be positive
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
expectation = functools.partial(model.variational_expectations, Y=Y, m=mu, gh_points=None, Y_metadata=Y_metadata)
#Function to get the nth returned value
def F(var):
return expectation(v=var)[0]
def dvar(var):
return expectation(v=var)[2]
grad = GradientChecker(F, dvar, var.copy(), 'v')
self.constrain_positive('v', grad)
#grad.randomize()
print(grad)
print(model)
assert grad.checkgrad(verbose=1)
class LaplaceTests(unittest.TestCase): class LaplaceTests(unittest.TestCase):
""" """
@ -610,6 +702,7 @@ class LaplaceTests(unittest.TestCase):
""" """
def setUp(self): def setUp(self):
np.random.seed(fixed_seed)
self.N = 15 self.N = 15
self.D = 1 self.D = 1
self.X = np.random.rand(self.N, self.D)*10 self.X = np.random.rand(self.N, self.D)*10
@ -705,7 +798,7 @@ class LaplaceTests(unittest.TestCase):
post_mean_approx, post_var_approx, = m2.predict(X) post_mean_approx, post_var_approx, = m2.predict(X)
if debug: if debug:
import pylab as pb from matplotlib import pyplot as pb
pb.figure(5) pb.figure(5)
pb.title('posterior means') pb.title('posterior means')
pb.scatter(X, post_mean, c='g') pb.scatter(X, post_mean, c='g')

View file

@ -1,7 +1,6 @@
import numpy as np import numpy as np
import scipy as sp import scipy as sp
from GPy.util.linalg import jitchol from ..util.linalg import jitchol,trace_dot, ijk_jlk_to_il, ijk_ljk_to_ilk
import GPy
class LinalgTests(np.testing.TestCase): class LinalgTests(np.testing.TestCase):
def setUp(self): def setUp(self):
@ -37,18 +36,19 @@ class LinalgTests(np.testing.TestCase):
except sp.linalg.LinAlgError: except sp.linalg.LinAlgError:
return True return True
def test_einsum_ijk_jlk_to_il(self): def test_trace_dot(self):
A = np.random.randn(50, 150, 5) N = 5
B = np.random.randn(150, 100, 5) A = np.random.rand(N,N)
pure = np.einsum('ijk,jlk->il', A, B) B = np.random.rand(N,N)
quick = GPy.util.linalg.ijk_jlk_to_il(A, B) trace = np.trace(A.dot(B))
np.testing.assert_allclose(pure, quick) test_trace = trace_dot(A,B)
np.testing.assert_allclose(trace,test_trace,atol=1e-13)
def test_einsum_ij_jlk_to_ilk(self): def test_einsum_ij_jlk_to_ilk(self):
A = np.random.randn(15, 150, 5) A = np.random.randn(15, 150, 5)
B = np.random.randn(150, 50, 5) B = np.random.randn(150, 50, 5)
pure = np.einsum('ijk,jlk->il', A, B) pure = np.einsum('ijk,jlk->il', A, B)
quick = GPy.util.linalg.ijk_jlk_to_il(A,B) quick = ijk_jlk_to_il(A,B)
np.testing.assert_allclose(pure, quick) np.testing.assert_allclose(pure, quick)
def test_einsum_ijk_ljk_to_ilk(self): def test_einsum_ijk_ljk_to_ilk(self):
@ -56,5 +56,5 @@ class LinalgTests(np.testing.TestCase):
B = np.random.randn(150, 20, 5) B = np.random.randn(150, 20, 5)
#B = A.copy() #B = A.copy()
pure = np.einsum('ijk,ljk->ilk', A, B) pure = np.einsum('ijk,ljk->ilk', A, B)
quick = GPy.util.linalg.ijk_ljk_to_ilk(A,B) quick = ijk_ljk_to_ilk(A,B)
np.testing.assert_allclose(pure, quick) np.testing.assert_allclose(pure, quick)

View file

@ -1,5 +1,5 @@
import numpy as np import numpy as np
import scipy as sp import scipy
from scipy.special import cbrt from scipy.special import cbrt
from GPy.models import GradientChecker from GPy.models import GradientChecker
_lim_val = np.finfo(np.float64).max _lim_val = np.finfo(np.float64).max
@ -79,8 +79,7 @@ class LinkFunctionTests(np.testing.TestCase):
assert np.isinf(np.exp(np.log(self.f_upper_lim))) assert np.isinf(np.exp(np.log(self.f_upper_lim)))
#Check the clipping works #Check the clipping works
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5) np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
#Need to look at most significant figures here rather than the decimals self.assertTrue(np.isfinite(link.transf(self.f_upper_lim)))
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), _lim_val, significant=5)
self.check_overflow(link, lim_of_inf) self.check_overflow(link, lim_of_inf)
#Check that it would otherwise fail #Check that it would otherwise fail
@ -93,18 +92,18 @@ class LinkFunctionTests(np.testing.TestCase):
link = Log_ex_1() link = Log_ex_1()
lim_of_inf = _lim_val_exp lim_of_inf = _lim_val_exp
np.testing.assert_almost_equal(np.log1p(np.exp(self.mid_f)), link.transf(self.mid_f)) np.testing.assert_almost_equal(scipy.special.log1p(np.exp(self.mid_f)), link.transf(self.mid_f))
assert np.isinf(np.log1p(np.exp(np.log(self.f_upper_lim)))) assert np.isinf(scipy.special.log1p(np.exp(np.log(self.f_upper_lim))))
#Check the clipping works #Check the clipping works
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5) np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
#Need to look at most significant figures here rather than the decimals #Need to look at most significant figures here rather than the decimals
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), np.log1p(_lim_val), significant=5) np.testing.assert_approx_equal(link.transf(self.f_upper_lim), scipy.special.log1p(_lim_val), significant=5)
self.check_overflow(link, lim_of_inf) self.check_overflow(link, lim_of_inf)
#Check that it would otherwise fail #Check that it would otherwise fail
beyond_lim_of_inf = lim_of_inf + 10.0 beyond_lim_of_inf = lim_of_inf + 10.0
old_err_state = np.seterr(over='ignore') old_err_state = np.seterr(over='ignore')
self.assertTrue(np.isinf(np.log1p(np.exp(beyond_lim_of_inf)))) self.assertTrue(np.isinf(scipy.special.log1p(np.exp(beyond_lim_of_inf))))
np.seterr(**old_err_state) np.seterr(**old_err_state)

View file

@ -1,6 +1,8 @@
from __future__ import print_function
import numpy as np import numpy as np
import scipy as sp import scipy as sp
import GPy import GPy
import warnings
class MiscTests(np.testing.TestCase): class MiscTests(np.testing.TestCase):
""" """
@ -11,8 +13,15 @@ class MiscTests(np.testing.TestCase):
self._lim_val_exp = np.log(self._lim_val) self._lim_val_exp = np.log(self._lim_val)
def test_safe_exp_upper(self): def test_safe_exp_upper(self):
assert np.exp(self._lim_val_exp + 1) == np.inf with warnings.catch_warnings(record=True) as w:
assert GPy.util.misc.safe_exp(self._lim_val_exp + 1) < np.inf warnings.simplefilter('always') # always print
assert np.isfinite(np.exp(self._lim_val_exp))
assert np.isinf(np.exp(self._lim_val_exp + 1))
assert np.isfinite(GPy.util.misc.safe_exp(self._lim_val_exp + 1))
print(w)
print(len(w))
assert len(w)<=1 # should have one overflow warning
def test_safe_exp_lower(self): def test_safe_exp_lower(self):
assert GPy.util.misc.safe_exp(1e-10) < np.inf assert GPy.util.misc.safe_exp(1e-10) < np.inf

View file

@ -15,6 +15,13 @@ class MiscTests(unittest.TestCase):
self.Y = np.sin(self.X) + np.random.randn(self.N, self.D) * 0.05 self.Y = np.sin(self.X) + np.random.randn(self.N, self.D) * 0.05
self.X_new = np.random.uniform(-3., 3., (self.N_new, 1)) self.X_new = np.random.uniform(-3., 3., (self.N_new, 1))
def test_setXY(self):
m = GPy.models.GPRegression(self.X, self.Y)
m.set_XY(np.vstack([self.X, np.random.rand(1,self.X.shape[1])]), np.vstack([self.Y, np.random.rand(1,self.Y.shape[1])]))
m._trigger_params_changed()
self.assertTrue(m.checkgrad())
m.predict(m.X)
def test_raw_predict(self): def test_raw_predict(self):
k = GPy.kern.RBF(1) k = GPy.kern.RBF(1)
m = GPy.models.GPRegression(self.X, self.Y, kernel=k) m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
@ -36,12 +43,78 @@ class MiscTests(unittest.TestCase):
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var) np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
np.testing.assert_almost_equal(mu_hat, mu) np.testing.assert_almost_equal(mu_hat, mu)
def test_normalizer(self):
k = GPy.kern.RBF(1)
Y = self.Y
mu, std = Y.mean(0), Y.std(0)
m = GPy.models.GPRegression(self.X, Y, kernel=k, normalizer=True)
m.optimize()
assert(m.checkgrad())
k = GPy.kern.RBF(1)
m2 = GPy.models.GPRegression(self.X, (Y-mu)/std, kernel=k, normalizer=False)
m2[:] = m[:]
mu1, var1 = m.predict(m.X, full_cov=True)
mu2, var2 = m2.predict(m2.X, full_cov=True)
np.testing.assert_allclose(mu1, (mu2*std)+mu)
np.testing.assert_allclose(var1, var2)
mu1, var1 = m.predict(m.X, full_cov=False)
mu2, var2 = m2.predict(m2.X, full_cov=False)
np.testing.assert_allclose(mu1, (mu2*std)+mu)
np.testing.assert_allclose(var1, var2)
q50n = m.predict_quantiles(m.X, (50,))
q50 = m2.predict_quantiles(m2.X, (50,))
np.testing.assert_allclose(q50n[0], (q50[0]*std)+mu)
def check_jacobian(self):
try:
import autograd.numpy as np, autograd as ag, GPy, matplotlib.pyplot as plt
from GPy.models import GradientChecker, GPRegression
except:
raise self.skipTest("autograd not available to check gradients")
def k(X, X2, alpha=1., lengthscale=None):
if lengthscale is None:
lengthscale = np.ones(X.shape[1])
exp = 0.
for q in range(X.shape[1]):
exp += ((X[:, [q]] - X2[:, [q]].T)/lengthscale[q])**2
#exp = np.sqrt(exp)
return alpha * np.exp(-.5*exp)
dk = ag.elementwise_grad(lambda x, x2: k(x, x2, alpha=ke.variance.values, lengthscale=ke.lengthscale.values))
dkdk = ag.elementwise_grad(dk, argnum=1)
ke = GPy.kern.RBF(1, ARD=True)
#ke.randomize()
ke.variance = .2#.randomize()
ke.lengthscale[:] = .5
ke.randomize()
X = np.linspace(-1, 1, 1000)[:,None]
X2 = np.array([[0.]]).T
np.testing.assert_allclose(ke.gradients_X([[1.]], X, X), dk(X, X))
np.testing.assert_allclose(ke.gradients_XX([[1.]], X, X).sum(0), dkdk(X, X))
np.testing.assert_allclose(ke.gradients_X([[1.]], X, X2), dk(X, X2))
np.testing.assert_allclose(ke.gradients_XX([[1.]], X, X2).sum(0), dkdk(X, X2))
m = GPRegression(self.X, self.Y)
def f(x):
m.X[:] = x
return m.log_likelihood()
def df(x):
m.X[:] = x
return m.kern.gradients_X(m.grad_dict['dL_dK'], X)
def ddf(x):
m.X[:] = x
return m.kern.gradients_XX(m.grad_dict['dL_dK'], X).sum(0)
gc = GradientChecker(f, df, self.X)
gc2 = GradientChecker(df, ddf, self.X)
assert(gc.checkgrad())
assert(gc2.checkgrad())
def test_sparse_raw_predict(self): def test_sparse_raw_predict(self):
k = GPy.kern.RBF(1) k = GPy.kern.RBF(1)
m = GPy.models.SparseGPRegression(self.X, self.Y, kernel=k) m = GPy.models.SparseGPRegression(self.X, self.Y, kernel=k)
m.randomize() m.randomize()
Z = m.Z[:] Z = m.Z[:]
X = self.X[:]
# Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression # Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
Kinv = m.posterior.woodbury_inv Kinv = m.posterior.woodbury_inv
@ -127,11 +200,24 @@ class MiscTests(unittest.TestCase):
m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing, m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
kernel=k, missing_data=True) kernel=k, missing_data=True)
assert(m.checkgrad()) assert(m.checkgrad())
mul, varl = m.predict(m.X)
k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q) k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing, m2 = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
kernel=k, missing_data=True) kernel=k, missing_data=True)
assert(m.checkgrad()) assert(m.checkgrad())
m2.kern.rbf.lengthscale[:] = 1e6
m2.X[:] = m.X.param_array
m2.likelihood[:] = m.likelihood[:]
m2.kern.white[:] = m.kern.white[:]
mu, var = m.predict(m.X)
np.testing.assert_allclose(mul, mu)
np.testing.assert_allclose(varl, var)
q50 = m.predict_quantiles(m.X, (50,))
np.testing.assert_allclose(mul, q50[0])
def test_likelihood_replicate_kern(self): def test_likelihood_replicate_kern(self):
m = GPy.models.GPRegression(self.X, self.Y) m = GPy.models.GPRegression(self.X, self.Y)
@ -410,8 +496,8 @@ class GradientTests(np.testing.TestCase):
self.check_model(rbf, model_type='SparseGPRegression', dimension=2) self.check_model(rbf, model_type='SparseGPRegression', dimension=2)
def test_SparseGPRegression_rbf_linear_white_kern_1D(self): def test_SparseGPRegression_rbf_linear_white_kern_1D(self):
''' Testing the sparse GP regression with rbf kernel on 2d data ''' ''' Testing the sparse GP regression with rbf kernel on 1d data '''
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1) rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1) + GPy.kern.White(1, 1e-5)
self.check_model(rbflin, model_type='SparseGPRegression', dimension=1) self.check_model(rbflin, model_type='SparseGPRegression', dimension=1)
def test_SparseGPRegression_rbf_linear_white_kern_2D(self): def test_SparseGPRegression_rbf_linear_white_kern_2D(self):
@ -419,14 +505,12 @@ class GradientTests(np.testing.TestCase):
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2) rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2) self.check_model(rbflin, model_type='SparseGPRegression', dimension=2)
# @unittest.expectedFailure
def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self): def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs''' ''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs'''
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2) rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
raise unittest.SkipTest("This is not implemented yet!") raise unittest.SkipTest("This is not implemented yet!")
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1) self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1)
# @unittest.expectedFailure
def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self): def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs''' ''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs'''
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1) rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1)
@ -443,6 +527,16 @@ class GradientTests(np.testing.TestCase):
m = GPy.models.GPLVM(Y, input_dim, kernel=k) m = GPy.models.GPLVM(Y, input_dim, kernel=k)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_BCGPLVM_rbf_bias_white_kern_2D(self):
""" Testing GPLVM with rbf + bias kernel """
N, input_dim, D = 50, 1, 2
X = np.random.rand(N, input_dim)
k = GPy.kern.RBF(input_dim, 0.5, 0.9 * np.ones((1,))) + GPy.kern.Bias(input_dim, 0.1) + GPy.kern.White(input_dim, 0.05)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
m = GPy.models.BCGPLVM(Y, input_dim, kernel=k)
self.assertTrue(m.checkgrad())
def test_GPLVM_rbf_linear_white_kern_2D(self): def test_GPLVM_rbf_linear_white_kern_2D(self):
""" Testing GPLVM with rbf + bias kernel """ """ Testing GPLVM with rbf + bias kernel """
N, input_dim, D = 50, 1, 2 N, input_dim, D = 50, 1, 2
@ -468,23 +562,8 @@ class GradientTests(np.testing.TestCase):
Z = np.linspace(0, 15, 4)[:, None] Z = np.linspace(0, 15, 4)[:, None]
kernel = GPy.kern.RBF(1) kernel = GPy.kern.RBF(1)
m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, Z=Z) m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, Z=Z)
# distribution = GPy.likelihoods.likelihood_functions.Bernoulli()
# likelihood = GPy.likelihoods.EP(Y, distribution)
# m = GPy.core.SparseGP(X, likelihood, kernel, Z)
# m.ensure_default_constraints()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_generalized_FITC(self):
N = 20
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
k = GPy.kern.RBF(1) + GPy.kern.White(1)
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
m = GPy.models.FITCClassification(X, Y, kernel=k)
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_multioutput_regression_1D(self): def test_multioutput_regression_1D(self):
X1 = np.random.rand(50, 1) * 8 X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5 X2 = np.random.rand(30, 1) * 5
@ -494,12 +573,11 @@ class GradientTests(np.testing.TestCase):
Y = np.vstack((Y1, Y2)) Y = np.vstack((Y1, Y2))
k1 = GPy.kern.RBF(1) k1 = GPy.kern.RBF(1)
m = GPy.models.GPMultioutputRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel_list=[k1]) m = GPy.models.GPCoregionalizedRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel=k1)
import ipdb;ipdb.set_trace() #import ipdb;ipdb.set_trace()
m.constrain_fixed('.*rbf_var', 1.) #m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_multioutput_sparse_regression_1D(self): def test_multioutput_sparse_regression_1D(self):
X1 = np.random.rand(500, 1) * 8 X1 = np.random.rand(500, 1) * 8
X2 = np.random.rand(300, 1) * 5 X2 = np.random.rand(300, 1) * 5
@ -509,8 +587,7 @@ class GradientTests(np.testing.TestCase):
Y = np.vstack((Y1, Y2)) Y = np.vstack((Y1, Y2))
k1 = GPy.kern.RBF(1) k1 = GPy.kern.RBF(1)
m = GPy.models.SparseGPMultioutputRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel_list=[k1]) m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel=k1)
m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_gp_heteroscedastic_regression(self): def test_gp_heteroscedastic_regression(self):
@ -539,6 +616,7 @@ class GradientTests(np.testing.TestCase):
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_gp_kronecker_gaussian(self): def test_gp_kronecker_gaussian(self):
np.random.seed(0)
N1, N2 = 30, 20 N1, N2 = 30, 20
X1 = np.random.randn(N1, 1) X1 = np.random.randn(N1, 1)
X2 = np.random.randn(N2, 1) X2 = np.random.randn(N2, 1)
@ -559,16 +637,16 @@ class GradientTests(np.testing.TestCase):
m.randomize() m.randomize()
mm[:] = m[:] mm[:] = m[:]
assert np.allclose(m.log_likelihood(), mm.log_likelihood()) self.assertTrue(np.allclose(m.log_likelihood(), mm.log_likelihood()))
assert np.allclose(m.gradient, mm.gradient) self.assertTrue(np.allclose(m.gradient, mm.gradient))
X1test = np.random.randn(100, 1) X1test = np.random.randn(100, 1)
X2test = np.random.randn(100, 1) X2test = np.random.randn(100, 1)
mean1, var1 = m.predict(X1test, X2test) mean1, var1 = m.predict(X1test, X2test)
yy, xx = np.meshgrid(X2test, X1test) yy, xx = np.meshgrid(X2test, X1test)
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
mean2, var2 = mm.predict(Xgrid) mean2, var2 = mm.predict(Xgrid)
assert np.allclose(mean1, mean2) self.assertTrue( np.allclose(mean1, mean2) )
assert np.allclose(var1, var2) self.assertTrue( np.allclose(var1, var2) )
def test_gp_VGPC(self): def test_gp_VGPC(self):
num_obs = 25 num_obs = 25
@ -576,7 +654,8 @@ class GradientTests(np.testing.TestCase):
X = X[:, None] X = X[:, None]
Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None] Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1) kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kern) lik = GPy.likelihoods.Gaussian()
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kernel=kern, likelihood=lik)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -248,10 +248,16 @@ class ParameterizedTest(unittest.TestCase):
m.randomize() m.randomize()
self.assertEqual(m.p1, val) self.assertEqual(m.p1, val)
def test_checkgrad(self):
assert(self.testmodel.kern.checkgrad())
assert(self.testmodel.kern.lengthscale.checkgrad())
assert(self.testmodel.likelihood.checkgrad())
def test_printing(self): def test_printing(self):
print(self.test1) print(self.test1)
print(self.param) print(self.param)
print(self.test1['']) print(self.test1[''])
print(self.testmodel.hierarchy_name(False))
if __name__ == "__main__": if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_add_parameter'] #import sys;sys.argv = ['', 'Test.test_add_parameter']

View file

@ -20,6 +20,8 @@ from GPy.examples.dimensionality_reduction import mrd_simulation
from GPy.core.parameterization.variational import NormalPosterior from GPy.core.parameterization.variational import NormalPosterior
from GPy.models.gp_regression import GPRegression from GPy.models.gp_regression import GPRegression
from functools import reduce from functools import reduce
from GPy.util.caching import Cacher
from pickle import PicklingError
def toy_model(): def toy_model():
X = np.linspace(0,1,50)[:, None] X = np.linspace(0,1,50)[:, None]
@ -205,23 +207,6 @@ class Test(ListDictTestCase):
def _callback(self, what, which): def _callback(self, what, which):
what.count += 1 what.count += 1
@unittest.skip
def test_add_observer(self):
par = toy_model()
par.name = "original"
par.count = 0
par.add_observer(self, self._callback, 1)
pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
self.assertNotIn(par.observers[0], pcopy.observers)
pcopy = par.copy()
pcopy.name = "copy"
self.assertTrue(par.checkgrad())
self.assertTrue(pcopy.checkgrad())
self.assertTrue(pcopy.kern.checkgrad())
import ipdb;ipdb.set_trace()
self.assertIn(par.observers[0], pcopy.observers)
self.assertEqual(par.count, 3)
self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad
if __name__ == "__main__": if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_parameter_index_operations'] #import sys;sys.argv = ['', 'Test.test_parameter_index_operations']

1
GPy/testing/run_coverage.sh Executable file
View file

@ -0,0 +1 @@
nosetests . --with-coverage --logging-level=INFO --cover-html --cover-html-dir=coverage --cover-package=GPy --cover-erase

Some files were not shown because too many files have changed in this diff Show more