Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Max Zwiessele 2015-08-28 16:28:25 +01:00
commit 938cc49aed
83 changed files with 35983 additions and 4358 deletions

View file

@ -60,9 +60,11 @@ class GP(Model):
self.normalizer.scale_by(Y)
self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
self.Y = Y
else:
elif isinstance(Y, np.ndarray):
self.Y = ObsAr(Y)
self.Y_normalized = self.Y
else:
self.Y = Y
if Y.shape[0] != self.num_data:
#There can be cases where we want inputs than outputs, for example if we have multiple latent
@ -89,7 +91,6 @@ class GP(Model):
assert mean_function.output_dim == self.output_dim
self.link_parameter(mean_function)
#find a sensible inference method
logger.info("initializing inference method")
if inference_method is None:
@ -182,7 +183,7 @@ class GP(Model):
"""
return self._log_marginal_likelihood
def _raw_predict(self, _Xnew, full_cov=False, kern=None):
def _raw_predict(self, Xnew, full_cov=False, kern=None):
"""
For making predictions, does not account for normalization or likelihood
@ -198,23 +199,30 @@ class GP(Model):
if kern is None:
kern = self.kern
Kx = kern.K(_Xnew, self.X).T
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
Kx = kern.K(self.X, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = kern.K(_Xnew)
var = Kxx - np.dot(Kx.T, WiKx)
Kxx = kern.K(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
var = var
else:
Kxx = kern.Kdiag(_Xnew)
var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1)
Kxx = kern.Kdiag(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]):
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
var = var
#add in the mean function
if self.mean_function is not None:
mu += self.mean_function.f(Xnew)
#force mu to be a column vector
if len(mu.shape)==1: mu = mu[:,None]
#add the mean function in
if not self.mean_function is None:
mu += self.mean_function.f(_Xnew)
return mu, var
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
@ -244,10 +252,10 @@ class GP(Model):
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
# now push through likelihood
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
return mean, var
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, kern=None):
"""
Get the predictive quantiles around the prediction at X
@ -255,13 +263,15 @@ class GP(Model):
:type X: np.ndarray (Xnew x self.input_dim)
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
:type quantiles: tuple
:param kern: optional kernel to use for prediction
:type predict_kw: dict
:returns: list of quantiles for each X and predictive quantiles for interval combination
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
"""
m, v = self._raw_predict(X, full_cov=False)
m, v = self._raw_predict(X, full_cov=False, kern=kern)
if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
def predictive_gradients(self, Xnew):
"""
@ -331,7 +341,7 @@ class GP(Model):
:returns: Ysim: set of simulations, a Numpy array (N x samples).
"""
fsim = self.posterior_samples_f(X, size, full_cov=full_cov)
Ysim = self.likelihood.samples(fsim, Y_metadata)
Ysim = self.likelihood.samples(fsim, Y_metadata=Y_metadata)
return Ysim
def plot_f(self, plot_limits=None, which_data_rows='all',
@ -473,16 +483,16 @@ class GP(Model):
self.inference_method.on_optimization_end()
raise
def infer_newX(self, Y_new, optimize=True, ):
def infer_newX(self, Y_new, optimize=True):
"""
Infer the distribution of X for the new observed data *Y_new*.
Infer X for the new observed data *Y_new*.
:param Y_new: the new observed data for inference
:type Y_new: numpy.ndarray
:param optimize: whether to optimize the location of new X (True by default)
:type optimize: boolean
:return: a tuple containing the posterior estimation of X and the model that optimize X
:rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` or numpy.ndarray, :class:`~GPy.core.model.Model`)
:rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` and numpy.ndarray, :class:`~GPy.core.model.Model`)
"""
from ..inference.latent_function_inference.inferenceX import infer_newX
return infer_newX(self, Y_new, optimize=optimize)

View file

@ -76,7 +76,7 @@ class Model(Parameterized):
jobs = []
pool = mp.Pool(processes=num_processes)
for i in range(num_restarts):
self.randomize()
if i>0: self.randomize()
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
jobs.append(job)
@ -90,7 +90,7 @@ class Model(Parameterized):
for i in range(num_restarts):
try:
if not parallel:
self.randomize()
if i>0: self.randomize()
self.optimize(**kwargs)
else:
self.optimization_runs.append(jobs[i].get())
@ -257,7 +257,7 @@ class Model(Parameterized):
optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model=self, max_iters=max_iters, **kwargs)
with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook) as vo:
with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook, clear_after_finish=clear_after_finish) as vo:
opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)
vo.finish(opt)

View file

@ -5,7 +5,7 @@ import numpy
from numpy.lib.function_base import vectorize
from .lists_and_dicts import IntArrayDict
from functools import reduce
from transformations import Transformation
from .transformations import Transformation
def extract_properties_to_index(index, props):
prop_index = dict()
@ -109,7 +109,7 @@ class ParameterIndexOperations(object):
try:
return self._properties.itervalues()
except AttributeError:
#Changed this from itervalues to values for Py3 compatibility. It didn't break the test suite.
#Changed this from itervalues to values for Py3 compatibility. It didn't break the test suite.
return self._properties.values()
def indices(self):

View file

@ -38,6 +38,11 @@ class Param(Parameterizable, ObsAr):
Fixing parameters will fix them to the value they are right now. If you change
the fixed value, it will be fixed to the new value!
Important Note:
Multilevel indexing (e.g. self[:2][1:]) is not supported and might lead to unexpected behaviour.
Try to index in one go, using boolean indexing or the numpy builtin
np.index function.
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
"""

View file

@ -430,23 +430,38 @@ class Indexable(Nameable, Updateable):
def log_prior(self):
"""evaluate the prior"""
if self.priors.size > 0:
x = self.param_array
#py3 fix
#return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
return 0.
if self.priors.size == 0:
return 0.
x = self.param_array
#evaluate the prior log densities
log_p = reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
#account for the transformation by evaluating the log Jacobian (where things are transformed)
log_j = 0.
priored_indexes = np.hstack([i for p, i in self.priors.items()])
for c,j in self.constraints.items():
if not isinstance(c, Transformation):continue
for jj in j:
if jj in priored_indexes:
log_j += c.log_jacobian(x[jj])
return log_p + log_j
def _log_prior_gradients(self):
"""evaluate the gradients of the priors"""
if self.priors.size > 0:
x = self.param_array
ret = np.zeros(x.size)
#py3 fix
#[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
return ret
return 0.
if self.priors.size == 0:
return 0.
x = self.param_array
ret = np.zeros(x.size)
#compute derivate of prior density
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
#add in jacobian derivatives if transformed
priored_indexes = np.hstack([i for p, i in self.priors.items()])
for c,j in self.constraints.items():
if not isinstance(c, Transformation):continue
for jj in j:
if jj in priored_indexes:
ret[jj] += c.log_jacobian_grad(x[jj])
return ret
#===========================================================================
# Tie parameters together

View file

@ -6,10 +6,10 @@ import numpy; np = numpy
import itertools
from re import compile, _pattern_type
from .param import ParamConcatenation
from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
from .parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
import logging
from index_operations import ParameterIndexOperationsView
from .index_operations import ParameterIndexOperationsView
logger = logging.getLogger("parameters changed meta")
class ParametersChangedMeta(type):
@ -197,9 +197,10 @@ class Parameterized(Parameterizable):
raise RuntimeError("{} does not seem to be a parameter, remove parameters directly from their respective parents".format(str(param)))
start = sum([p.size for p in self.parameters[:param._parent_index_]])
self._remove_parameter_name(param)
self.size -= param.size
del self.parameters[param._parent_index_]
self._remove_parameter_name(param)
param._disconnect_parent()
param.remove_observer(self, self._pass_through_notify_observers)

View file

@ -522,16 +522,9 @@ class DGPLVM(Prior):
"""
domain = _REAL
# _instances = []
# def __new__(cls, mu, sigma): # Singleton:
# if cls._instances:
# cls._instances[:] = [instance for instance in cls._instances if instance()]
# for instance in cls._instances:
# if instance().mu == mu and instance().sigma == sigma:
# return instance()
# o = super(Prior, cls).__new__(cls, mu, sigma)
# cls._instances.append(weakref.ref(o))
# return cls._instances[-1]()
def __new__(cls, sigma2, lbl, x_shape):
return super(Prior, cls).__new__(cls, sigma2, lbl, x_shape)
def __init__(self, sigma2, lbl, x_shape):
self.sigma2 = sigma2
@ -758,12 +751,12 @@ class DGPLVM_Lamda(Prior, Parameterized):
self.sigma2 = sigma2
# self.x = x
self.lbl = lbl
self.lamda = lamda
self.lamda = lamda
self.classnum = lbl.shape[1]
self.datanum = lbl.shape[0]
self.x_shape = x_shape
self.dim = x_shape[1]
self.lamda = Param('lamda', np.diag(lamda))
self.lamda = Param('lamda', np.diag(lamda))
self.link_parameter(self.lamda)
def get_class_label(self, y):
@ -789,7 +782,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
M_i = np.zeros((self.classnum, self.dim))
for i in cls:
# Mean of each class
class_i = cls[i]
class_i = cls[i]
M_i[i] = np.mean(class_i, axis=0)
return M_i
@ -843,7 +836,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Calculating beta and Bi for Sb
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
import pdb
# import pdb
# pdb.set_trace()
B_i = np.zeros((self.classnum, self.dim))
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
@ -899,8 +892,8 @@ class DGPLVM_Lamda(Prior, Parameterized):
#!!!!!!!!!!!!!!!!!!!!!!!!!!!
#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
@ -909,15 +902,15 @@ class DGPLVM_Lamda(Prior, Parameterized):
Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
@ -933,8 +926,8 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Calculating inverse of Sb and its transpose and minus
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw)
@ -951,14 +944,14 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
DPxprim_Dx = DPxprim_Dx.T
DPxprim_Dlamda = DPx_Dx.dot(x)
DPxprim_Dlamda = DPx_Dx.dot(x)
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
DPxprim_Dlamda = DPxprim_Dlamda.T
DPxprim_Dlamda = DPxprim_Dlamda.T
self.lamda.gradient = np.diag(DPxprim_Dlamda)
self.lamda.gradient = np.diag(DPxprim_Dlamda)
# print DPxprim_Dx
return DPxprim_Dx
return DPxprim_Dx
# def frb(self, x):
@ -1139,8 +1132,8 @@ class DGPLVM_T(Prior):
# This function calculates log of our prior
def lnpdf(self, x):
x = x.reshape(self.x_shape)
xprim = x.dot(self.vec)
x = xprim
xprim = x.dot(self.vec)
x = xprim
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
@ -1156,11 +1149,11 @@ class DGPLVM_T(Prior):
# This function calculates derivative of the log of prior function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
xprim = x.dot(self.vec)
x = xprim
x = x.reshape(self.x_shape)
xprim = x.dot(self.vec)
x = xprim
# print x
cls = self.compute_cls(x)
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)

View file

@ -31,6 +31,16 @@ class Transformation(object):
raise NotImplementedError
def finv(self, model_param):
raise NotImplementedError
def log_jacobian(self, model_param):
"""
compute the log of the jacobian of f, evaluated at f(x)= model_param
"""
raise NotImplementedError
def log_jacobian_grad(self, model_param):
"""
compute the drivative of the log of the jacobian of f, evaluated at f(x)= model_param
"""
raise NotImplementedError
def gradfactor(self, model_param, dL_dmodel_param):
""" df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param,
@ -74,9 +84,33 @@ class Logexp(Transformation):
if np.any(f < 0.):
print("Warning: changing parameters to satisfy constraints")
return np.abs(f)
def log_jacobian(self, model_param):
return np.where(model_param>_lim_val, model_param, np.log(np.exp(model_param+1e-20) - 1.)) - model_param
def log_jacobian_grad(self, model_param):
return 1./(np.exp(model_param)-1.)
def __str__(self):
return '+ve'
class Exponent(Transformation):
domain = _POSITIVE
def f(self, x):
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
def finv(self, x):
return np.log(x)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, f)
def initialize(self, f):
if np.any(f < 0.):
print("Warning: changing parameters to satisfy constraints")
return np.abs(f)
def log_jacobian(self, model_param):
return np.log(model_param)
def log_jacobian_grad(self, model_param):
return 1./model_param
def __str__(self):
return '+ve'
class NormalTheta(Transformation):
"Do not use, not officially supported!"
@ -417,22 +451,6 @@ class LogexpClipped(Logexp):
def __str__(self):
return '+ve_c'
class Exponent(Transformation):
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative Exponent below. See old MATLAB code.
domain = _POSITIVE
def f(self, x):
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
def finv(self, x):
return np.log(x)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, f)
def initialize(self, f):
if np.any(f < 0.):
print("Warning: changing parameters to satisfy constraints")
return np.abs(f)
def __str__(self):
return '+ve'
class NegativeExponent(Exponent):
domain = _NEGATIVE
def f(self, x):

View file

@ -36,8 +36,9 @@ class NormalPrior(VariationalPrior):
variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5
class SpikeAndSlabPrior(VariationalPrior):
def __init__(self, pi=None, learnPi=False, variance = 1.0, name='SpikeAndSlabPrior', **kw):
super(SpikeAndSlabPrior, self).__init__(name=name, **kw)
def __init__(self, pi=None, learnPi=False, variance = 1.0, group_spike=False, name='SpikeAndSlabPrior', **kw):
super(SpikeAndSlabPrior, self).__init__(name=name, **kw)
self.group_spike = group_spike
self.variance = Param('variance',variance)
self.learnPi = learnPi
if learnPi:
@ -50,7 +51,10 @@ class SpikeAndSlabPrior(VariationalPrior):
def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.gamma.values
if self.group_spike:
gamma = variational_posterior.gamma.values[0]
else:
gamma = variational_posterior.gamma.values
if len(self.pi.shape)==2:
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
@ -65,14 +69,21 @@ class SpikeAndSlabPrior(VariationalPrior):
def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.gamma.values
if self.group_spike:
gamma = variational_posterior.gamma.values[0]
else:
gamma = variational_posterior.gamma.values
if len(self.pi.shape)==2:
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
if self.group_spike:
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))/variational_posterior.num_data
else:
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= gamma*mu/self.variance
S.gradient -= (1./self.variance - 1./S) * gamma /2.
if self.learnPi:
@ -150,17 +161,45 @@ class NormalPosterior(VariationalPosterior):
from ...plotting.matplot_dep import variational_plots
return variational_plots.plot(self, *args, **kwargs)
def KL(self, other):
"""Compute the KL divergence to another NormalPosterior Object. This only holds, if the two NormalPosterior objects have the same shape, as we do computational tricks for the multivariate normal KL divergence.
"""
return .5*(
np.sum(self.variance/other.variance)
+ ((other.mean-self.mean)**2/other.variance).sum()
- self.num_data * self.input_dim
+ np.sum(np.log(other.variance)) - np.sum(np.log(self.variance))
)
class SpikeAndSlabPosterior(VariationalPosterior):
'''
The SpikeAndSlab distribution for variational approximations.
'''
def __init__(self, means, variances, binary_prob, name='latent space'):
def __init__(self, means, variances, binary_prob, group_spike=False, sharedX=False, name='latent space'):
"""
binary_prob : the probability of the distribution on the slab part.
"""
super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
self.gamma = Param("binary_prob",binary_prob,Logistic(0.,1.))
self.link_parameter(self.gamma)
self.group_spike = group_spike
self.sharedX = sharedX
if sharedX:
self.mean.fix(warning=False)
self.variance.fix(warning=False)
if group_spike:
self.gamma_group = Param("binary_prob_group",binary_prob.mean(axis=0),Logistic(1e-10,1.-1e-10))
self.gamma = Param("binary_prob",binary_prob, __fixed__)
self.link_parameters(self.gamma_group,self.gamma)
else:
self.gamma = Param("binary_prob",binary_prob,Logistic(1e-10,1.-1e-10))
self.link_parameter(self.gamma)
def propogate_val(self):
if self.group_spike:
self.gamma.values[:] = self.gamma_group.values
def collate_gradient(self):
if self.group_spike:
self.gamma_group.gradient = self.gamma.gradient.reshape(self.gamma.shape).sum(axis=0)
def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad
@ -179,15 +218,15 @@ class SpikeAndSlabPosterior(VariationalPosterior):
n.parameters[dc['variance']._parent_index_] = dc['variance']
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size
n.size = n.mean.size + n.variance.size + oversize
oversize = self.size - self.mean.size - self.variance.size - self.gamma.size
n.size = n.mean.size + n.variance.size + n.gamma.size + oversize
n.ndim = n.mean.ndim
n.shape = n.mean.shape
n.num_data = n.mean.shape[0]
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
return n
else:
return super(VariationalPrior, self).__getitem__(s)
return super(SpikeAndSlabPosterior, self).__getitem__(s)
def plot(self, *args, **kwargs):
"""

View file

@ -133,7 +133,7 @@ class SparseGP(GP):
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]):
for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
var = var
else:

View file

@ -34,7 +34,7 @@ class SparseGP_MPI(SparseGP):
"""
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False):
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp', Y_metadata=None, mpi_comm=None, normalizer=False):
self._IN_OPTIMIZATION_ = False
if mpi_comm != None:
if inference_method is None:

View file

@ -5,7 +5,7 @@ import numpy as np
from ..util import choleskies
from .sparse_gp import SparseGP
from .parameterization.param import Param
from ..inference.latent_function_inference import SVGP as svgp_inf
from ..inference.latent_function_inference.svgp import SVGP as svgp_inf
class SVGP(SparseGP):
@ -46,7 +46,7 @@ class SVGP(SparseGP):
num_latent_functions = Y.shape[1]
self.m = Param('q_u_mean', np.zeros((self.num_inducing, num_latent_functions)))
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,num_latent_functions)))
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[None,:,:], (num_latent_functions, 1,1)))
self.chol = Param('q_u_chol', chol)
self.link_parameter(self.chol)
self.link_parameter(self.m)

View file

@ -217,9 +217,8 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
elif model_type == 'FITC':
m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
m['.*len'] = 3.
if optimize:
m.pseudo_EM()
m.optimize()
if plot:
m.plot()

View file

@ -215,6 +215,7 @@ def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40
return m
def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
"""Simulate some data drawn from a matern covariance and a periodic exponential for use in MRD demos."""
Q_signal = 4
import GPy
import numpy as np
@ -254,6 +255,7 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
return slist, [S1, S2, S3], Ylist
def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
"""Simulate some data drawn from sine and cosine for use in demos of MRD"""
_np.random.seed(1234)
x = _np.linspace(0, 4 * _np.pi, N)[:, None]
@ -353,13 +355,13 @@ def ssgplvm_simulation(optimize=True, verbose=1,
Y = Ylist[0]
k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
# k = kern.RBF(Q, ARD=True, lengthscale=10.)
m = SSGPLVM(Y, Q, init="pca", num_inducing=num_inducing, kernel=k)
m = SSGPLVM(Y, Q, init="rand", num_inducing=num_inducing, kernel=k, group_spike=True)
m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape)
m.likelihood.variance = .1
m.likelihood.variance = .01
if optimize:
print("Optimizing model:")
m.optimize('scg', messages=verbose, max_iters=max_iters,
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05)
if plot:
m.X.plot("SSGPLVM Latent Space 1D")
@ -402,7 +404,8 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
from GPy.models import MRD
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim)
# Ylist = [Ylist[0]]
k = kern.Linear(Q, ARD=True)
@ -585,6 +588,7 @@ def robot_wireless(optimize=True, verbose=True, plot=True):
return m
def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
"""Interactive visualisation of the Stick Man data from Ohio State University with the Bayesian GPLVM."""
from GPy.models import BayesianGPLVM
from matplotlib import pyplot as plt
import numpy as np
@ -613,7 +617,8 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect'])
dim_select = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
fig.canvas.draw()
fig.canvas.show()
# Canvas.show doesn't work on OSX.
#fig.canvas.show()
raw_input('Press enter to finish')
return m

View file

@ -69,7 +69,7 @@ from .expectation_propagation_dtc import EPDTC
from .dtc import DTC
from .fitc import FITC
from .var_dtc_parallel import VarDTC_minibatch
from .svgp import SVGP
from .var_gauss import VarGauss
# class FullLatentFunctionData(object):
#

View file

@ -4,6 +4,7 @@
import numpy as np
from ...core import Model
from ...core.parameterization import variational
from GPy.core.parameterization.variational import VariationalPosterior
def infer_newX(model, Y_new, optimize=True, init='L2'):
"""
@ -27,12 +28,19 @@ def infer_newX(model, Y_new, optimize=True, init='L2'):
class InferenceX(Model):
"""
The class for inference of new X with given new Y. (do_test_latent)
The model class for inference of new X with given new Y. (replacing the "do_test_latent" in Bayesian GPLVM)
It is a tiny inference model created from the original GP model. The kernel, likelihood (only Gaussian is supported at the moment)
and posterior distribution are taken from the original model.
For Regression models and GPLVM, a point estimate of the latent variable X will be inferred.
For Bayesian GPLVM, the variational posterior of X will be inferred.
X is inferred through a gradient optimization of the inference model.
:param model: the GPy model used in inference
:type model: GPy.core.Model
:param Y: the new observed data for inference
:type Y: numpy.ndarray
:param init: the distance metric of Y for initializing X with the nearest neighbour.
:type init: 'L2', 'NCC' and 'rand'
"""
def __init__(self, model, Y, name='inferenceX', init='L2'):
if np.isnan(Y).any() or getattr(model, 'missing_data', False):
@ -45,20 +53,27 @@ class InferenceX(Model):
super(InferenceX, self).__init__(name)
self.likelihood = model.likelihood.copy()
self.kern = model.kern.copy()
if model.kern.useGPU:
from ...models import SSGPLVM
if isinstance(model, SSGPLVM):
self.kern.GPU_SSRBF(True)
else:
self.kern.GPU(True)
# if model.kern.useGPU:
# from ...models import SSGPLVM
# if isinstance(model, SSGPLVM):
# self.kern.GPU_SSRBF(True)
# else:
# self.kern.GPU(True)
from copy import deepcopy
self.posterior = deepcopy(model.posterior)
if hasattr(model, 'variational_prior'):
from ...core.parameterization.variational import VariationalPosterior
if isinstance(model.X, VariationalPosterior):
self.uncertain_input = True
self.variational_prior = model.variational_prior.copy()
from ...models.ss_gplvm import IBPPrior
from ...models.ss_mrd import IBPPrior_SSMRD
if isinstance(model.variational_prior, IBPPrior) or isinstance(model.variational_prior, IBPPrior_SSMRD):
from ...core.parameterization.variational import SpikeAndSlabPrior
self.variational_prior = SpikeAndSlabPrior(pi=0.5, learnPi=False, group_spike=False)
else:
self.variational_prior = model.variational_prior.copy()
else:
self.uncertain_input = False
if hasattr(model, 'inducing_inputs'):
if hasattr(model, 'Z'):
self.sparse_gp = True
self.Z = model.Z.copy()
else:
@ -147,9 +162,9 @@ class InferenceX(Model):
from ...core.parameterization.variational import SpikeAndSlabPrior
if isinstance(self.variational_prior, SpikeAndSlabPrior):
# Update Log-likelihood
KL_div = self.variational_prior.KL_divergence(self.X, N=self.Y.shape[0])
KL_div = self.variational_prior.KL_divergence(self.X)
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X, N=self.Y.shape[0])
self.variational_prior.update_gradients_KL(self.X)
else:
# Update Log-likelihood
KL_div = self.variational_prior.KL_divergence(self.X)

View file

@ -139,10 +139,6 @@ class Laplace(LatentFunctionInference):
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
self.f_hat = f_hat
#self.Ki_fhat = Ki_fhat
#self.K = K.copy()
#Compute hessian and other variables at mode
log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
@ -175,7 +171,9 @@ class Laplace(LatentFunctionInference):
#define the objective function (to be maximised)
def obj(Ki_f, f):
ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
print ll
if np.isnan(ll):
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
return -np.inf
else:
return ll
@ -298,6 +296,11 @@ class Laplace(LatentFunctionInference):
else:
dL_dthetaL = np.zeros(likelihood.size)
#Cache some things for speedy LOO
self.Ki_W_i = Ki_W_i
self.K = K
self.W = W
self.f_hat = f_hat
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):

View file

@ -3,6 +3,7 @@ from ...util import linalg
from ...util import choleskies
import numpy as np
from .posterior import Posterior
from scipy.linalg.blas import dgemm, dsymm, dtrmm
class SVGP(LatentFunctionInference):
@ -16,16 +17,13 @@ class SVGP(LatentFunctionInference):
S = np.empty((num_outputs, num_inducing, num_inducing))
[np.dot(L[:,:,i], L[:,:,i].T, S[i,:,:]) for i in range(num_outputs)]
S = S.swapaxes(0,2)
[np.dot(L[i,:,:], L[i,:,:].T, S[i,:,:]) for i in range(num_outputs)]
#Si,_ = linalg.dpotri(np.asfortranarray(L), lower=1)
Si = choleskies.multiple_dpotri(L)
logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])])
logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[i,:,:])))) for i in range(L.shape[0])])
if np.any(np.isinf(Si)):
raise ValueError("Cholesky representation unstable")
#S = S + np.eye(S.shape[0])*1e-5*np.max(np.max(S))
#Si, Lnew, _,_ = linalg.pdinv(S)
#compute mean function stuff
if mean_function is not None:
@ -35,27 +33,31 @@ class SVGP(LatentFunctionInference):
prior_mean_u = np.zeros((num_inducing, num_outputs))
prior_mean_f = np.zeros((num_data, num_outputs))
#compute kernel related stuff
Kmm = kern.K(Z)
Knm = kern.K(X, Z)
Kmn = kern.K(Z, X)
Knn_diag = kern.Kdiag(X)
Kmmi, Lm, Lmi, logdetKmm = linalg.pdinv(Kmm)
Lm = linalg.jitchol(Kmm)
logdetKmm = 2.*np.sum(np.log(np.diag(Lm)))
Kmmi, _ = linalg.dpotri(Lm)
#compute the marginal means and variances of q(f)
A = np.dot(Knm, Kmmi)
mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
#v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jlk->ilk', A, S),1)
v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * linalg.ij_jlk_to_ilk(A, S),1)
A, _ = linalg.dpotrs(Lm, Kmn)
mu = prior_mean_f + np.dot(A.T, q_u_mean - prior_mean_u)
v = np.empty((num_data, num_outputs))
for i in range(num_outputs):
tmp = dtrmm(1.0,L[i].T, A, lower=0, trans_a=0)
v[:,i] = np.sum(np.square(tmp),0)
v += (Knn_diag - np.sum(A*Kmn,0))[:,None]
#compute the KL term
Kmmim = np.dot(Kmmi, q_u_mean)
KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[:,:,None]*S,0).sum(0) + 0.5*np.sum(q_u_mean*Kmmim,0)
KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[None,:,:]*S,1).sum(1) + 0.5*np.sum(q_u_mean*Kmmim,0)
KL = KLs.sum()
#gradient of the KL term (assuming zero mean function)
dKL_dm = Kmmim.copy()
dKL_dS = 0.5*(Kmmi[:,:,None] - Si)
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
dKL_dS = 0.5*(Kmmi[None,:,:] - Si)
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(0)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
if mean_function is not None:
#adjust KL term for mean function
@ -80,17 +82,20 @@ class SVGP(LatentFunctionInference):
dF_dthetaL = dF_dthetaL.sum(1).sum(1)*batch_scale
#derivatives of expected likelihood, assuming zero mean function
Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal
Admu = A.T.dot(dF_dmu)
AdvA = np.dstack([np.dot(A.T, Adv[:,:,i].T) for i in range(num_outputs)])
#tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi)
tmp = linalg.ijk_jlk_to_il(AdvA, S).dot(Kmmi)
dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(-1) - tmp - tmp.T
Adv = A[None,:,:]*dF_dv.T[:,None,:] # As if dF_Dv is diagonal, D, M, N
Admu = A.dot(dF_dmu)
Adv = np.ascontiguousarray(Adv) # makes for faster operations later...(inc dsymm)
AdvA = np.dot(Adv.reshape(-1, num_data),A.T).reshape(num_outputs, num_inducing, num_inducing )
tmp = np.sum([np.dot(a,s) for a, s in zip(AdvA, S)],0).dot(Kmmi)
dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(0) - tmp - tmp.T
dF_dKmm = 0.5*(dF_dKmm + dF_dKmm.T) # necessary? GPy bug?
#tmp = 2.*(np.einsum('ij,jlk->ilk', Kmmi,S) - np.eye(num_inducing)[:,:,None])
tmp = 2.*(linalg.ij_jlk_to_ilk(Kmmi, S) - np.eye(num_inducing)[:,:,None])
#dF_dKmn = np.einsum('ijk,jlk->il', tmp, Adv) + Kmmim.dot(dF_dmu.T)
dF_dKmn = linalg.ijk_jlk_to_il(tmp, Adv) + Kmmim.dot(dF_dmu.T)
tmp = S.reshape(-1, num_inducing).dot(Kmmi).reshape(num_outputs, num_inducing , num_inducing )
tmp = 2.*(tmp - np.eye(num_inducing)[None, :,:])
dF_dKmn = Kmmim.dot(dF_dmu.T)
for a,b in zip(tmp, Adv):
dF_dKmn += np.dot(a.T, b)
dF_dm = Admu
dF_dS = AdvA
@ -106,11 +111,11 @@ class SVGP(LatentFunctionInference):
log_marginal = F.sum() - KL
dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn
dL_dchol = np.dstack([2.*np.dot(dL_dS[:,:,i], L[:,:,i]) for i in range(num_outputs)])
dL_dchol = 2.*np.array([np.dot(a,b) for a, b in zip(dL_dS, L) ])
dL_dchol = choleskies.triang_to_flat(dL_dchol)
grad_dict = {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv.sum(1), 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
if mean_function is not None:
grad_dict['dL_dmfZ'] = dF_dmfZ - dKL_dmfZ
grad_dict['dL_dmfX'] = dF_dmfX
return Posterior(mean=q_u_mean, cov=S, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict
return Posterior(mean=q_u_mean, cov=S.T, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict

View file

@ -172,18 +172,23 @@ class VarDTC_minibatch(LatentFunctionInference):
if not np.isfinite(Kmm).all():
print(Kmm)
Lm = jitchol(Kmm)
LmInv = dtrtri(Lm)
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
LmInvPsi2LmInvT = LmInv.dot(psi2_full.dot(LmInv.T))
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
LL = jitchol(Lambda)
LLInv = dtrtri(LL)
logdet_L = 2.*np.sum(np.log(np.diag(LL)))
b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0]
LmLLInv = LLInv.dot(LmInv)
b = psi1Y_full.dot(LmLLInv.T)
bbt = np.square(b).sum()
v = dtrtrs(Lm,dtrtrs(LL,b,trans=1)[0],trans=1)[0]
tmp = -backsub_both_sides(LL, tdot(b)+output_dim*np.eye(input_dim), transpose='left')
dL_dpsi2R = backsub_both_sides(Lm, tmp+output_dim*np.eye(input_dim), transpose='left')/2.
v = b.dot(LmLLInv).T
LLinvPsi1TYYTPsi1LLinvT = tdot(b.T)
tmp = -LLInv.T.dot(LLinvPsi1TYYTPsi1LLinvT+output_dim*np.eye(input_dim)).dot(LLInv)
dL_dpsi2R = LmInv.T.dot(tmp+output_dim*np.eye(input_dim)).dot(LmInv)/2.
# Cache intermediate results
self.midRes['dL_dpsi2R'] = dL_dpsi2R
self.midRes['v'] = v
@ -201,7 +206,7 @@ class VarDTC_minibatch(LatentFunctionInference):
# Compute dL_dKmm
#======================================================================
dL_dKmm = dL_dpsi2R - output_dim*backsub_both_sides(Lm, LmInvPsi2LmInvT, transpose='left')/2.
dL_dKmm = dL_dpsi2R - output_dim*LmInv.T.dot(LmInvPsi2LmInvT).dot(LmInv)/2.
#======================================================================
# Compute the Posterior distribution of inducing points p(u|Y)

View file

@ -0,0 +1,66 @@
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ...util.linalg import pdinv
from .posterior import Posterior
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
class VarGauss(LatentFunctionInference):
"""
The Variational Gaussian Approximation revisited
@article{Opper:2009,
title = {The Variational Gaussian Approximation Revisited},
author = {Opper, Manfred and Archambeau, C{\'e}dric},
journal = {Neural Comput.},
year = {2009},
pages = {786--792},
}
"""
def __init__(self, alpha, beta):
"""
:param alpha: GPy.core.Param varational parameter
:param beta: GPy.core.Param varational parameter
"""
self.alpha, self.beta = alpha, beta
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, Z=None):
if mean_function is not None:
raise NotImplementedError
num_data, output_dim = Y.shape
assert output_dim ==1, "Only one output supported"
K = kern.K(X)
m = K.dot(self.alpha)
KB = K*self.beta[:, None]
BKB = KB*self.beta[None, :]
A = np.eye(num_data) + BKB
Ai, LA, _, Alogdet = pdinv(A)
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
var = np.diag(Sigma).reshape(-1,1)
F, dF_dm, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, m, var, Y_metadata=Y_metadata)
if dF_dthetaL is not None:
dL_dthetaL = dF_dthetaL.sum(1).sum(1)
else:
dL_dthetaL = np.array([])
dF_da = np.dot(K, dF_dm)
SigmaB = Sigma*self.beta
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
KL = 0.5*(Alogdet + np.trace(Ai) - num_data + np.sum(m*self.alpha))
dKL_da = m
A_A2 = Ai - Ai.dot(Ai)
dKL_db = np.diag(np.dot(KB.T, A_A2))
log_marginal = F.sum() - KL
self.alpha.gradient = dF_da - dKL_da
self.beta.gradient = dF_db - dKL_db
# K-gradients
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
tmp = Ai*self.beta[:, None]/self.beta[None, :]
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
return Posterior(mean=m, cov=Sigma ,K=K),\
log_marginal,\
{'dL_dK':dF_dK-dKL_dK, 'dL_dthetaL':dL_dthetaL}

View file

@ -142,7 +142,7 @@ class opt_lbfgsb(Optimizer):
#a more helpful error message is available in opt_result in the Error case
if opt_result[2]['warnflag']==2:
self.status = 'Error' + opt_result[2]['task']
self.status = 'Error' + str(opt_result[2]['task'])
class opt_simplex(Optimizer):
def __init__(self, *args, **kwargs):

View file

@ -5,6 +5,10 @@ class StochasticStorage(object):
'''
This is a container for holding the stochastic parameters,
such as subset indices or step length and so on.
self.d has to be a list of lists:
[dimension indices, nan indices for those dimensions]
so that the minibatches can be used as efficiently as possible.10
'''
def __init__(self, model):
"""
@ -28,9 +32,23 @@ class SparseGPMissing(StochasticStorage):
"""
Here we want to loop over all dimensions everytime.
Thus, we can just make sure the loop goes over self.d every
time.
time. We will try to get batches which look the same together
which speeds up calculations significantly.
"""
self.d = range(model.Y_normalized.shape[1])
import numpy as np
self.Y = model.Y_normalized
bdict = {}
for d in range(self.Y.shape[1]):
inan = np.isnan(self.Y[:, d])
arr_str = np.array2string(inan,
np.inf, 0,
True, '',
formatter={'bool':lambda x: '1' if x else '0'})
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
self.d = bdict.values()
class SparseGPStochastics(StochasticStorage):
"""
@ -40,16 +58,29 @@ class SparseGPStochastics(StochasticStorage):
def __init__(self, model, batchsize=1):
self.batchsize = batchsize
self.output_dim = model.Y.shape[1]
self.Y = model.Y_normalized
self.reset()
self.do_stochastics()
def do_stochastics(self):
if self.batchsize == 1:
self.current_dim = (self.current_dim+1)%self.output_dim
self.d = [self.current_dim]
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.d])]]
else:
import numpy as np
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
bdict = {}
for d in self.d:
inan = np.isnan(self.Y[:, d])
arr_str = int(np.array2string(inan,
np.inf, 0,
True, '',
formatter={'bool':lambda x: '1' if x else '0'}), 2)
try:
bdict[arr_str][0].append(d)
except:
bdict[arr_str] = [[d], ~inan]
self.d = bdict.values()
def reset(self):
self.current_dim = -1

View file

@ -6,6 +6,7 @@ from ._src.brownian import Brownian
from ._src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from ._src.mlp import MLP
from ._src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
from ._src.standard_periodic import StdPeriodic
from ._src.independent_outputs import IndependentOutputs, Hierarchical
from ._src.coregionalize import Coregionalize
from ._src.ODE_UY import ODE_UY
@ -17,7 +18,7 @@ from ._src.eq_ode2 import EQ_ODE2
from ._src.trunclinear import TruncLinear,TruncLinear_inf
from ._src.splitKern import SplitKern,DEtime
from ._src.splitKern import DEtime as DiffGenomeKern
from _src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
from ._src.spline import Spline
from ._src.eq_ode2 import EQ_ODE2
from ._src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel

View file

@ -6,7 +6,7 @@ import numpy as np
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.config import config # for assesing whether to use cython
import coregionalize_cython
from . import coregionalize_cython
class Coregionalize(Kern):
"""
@ -94,7 +94,7 @@ class Coregionalize(Kern):
dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)
dkappa = np.diag(dL_dK_small)
dkappa = np.diag(dL_dK_small).copy()
dL_dK_small += dL_dK_small.T
dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)

File diff suppressed because it is too large Load diff

View file

@ -1,33 +1,37 @@
#cython: boundscheck=True
#cython: wraparound=True
#cython: boundscheck=False
#cython: wraparound=False
#cython: nonecheck=False
import cython
import numpy as np
cimport numpy as np
def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X):
cdef int N = X.size
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, N))
for n in range(N):
for m in range(N):
K[n,m] = B[X[n],X[m]]
cdef np.ndarray[np.double_t, ndim=2, mode='c'] K = np.empty((N, N))
with nogil:
for n in range(N):
for m in range(N):
K[n, m] = B[X[n], X[m]]
return K
def K_asymmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X, np.ndarray[np.int64_t, ndim=1] X2):
cdef int N = X.size
cdef int M = X2.size
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, M))
for n in range(N):
for m in range(M):
K[n,m] = B[X[n],X2[m]]
cdef np.ndarray[np.double_t, ndim=2, mode='c'] K = np.empty((N, M))
with nogil:
for n in range(N):
for m in range(M):
K[n, m] = B[X[n], X2[m]]
return K
def gradient_reduce(int D, np.ndarray[double, ndim=2] dL_dK, np.ndarray[np.int64_t, ndim=1] index, np.ndarray[np.int64_t, ndim=1] index2):
cdef np.ndarray[np.double_t, ndim=2] dL_dK_small = np.zeros((D, D))
cdef np.ndarray[np.double_t, ndim=2, mode='c'] dL_dK_small = np.zeros((D, D))
cdef int N = index.size
cdef int M = index2.size
for i in range(N):
for j in range(M):
dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
with nogil:
for i in range(N):
for j in range(M):
dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
return dL_dK_small

View file

@ -105,7 +105,7 @@ class IndependentOutputs(CombinationKernel):
if X2 is None:
# TODO: make use of index_to_slices
# FIXME: Broken as X is already sliced out
print "Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!"
print("Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!")
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))

View file

@ -59,6 +59,9 @@ class Kern(Parameterized):
self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool)
from .psi_comp import PSICOMP_GH
self.psicomp = PSICOMP_GH()
@property
def return_psi2_n(self):
@ -90,11 +93,11 @@ class Kern(Parameterized):
def Kdiag(self, X):
raise NotImplementedError
def psi0(self, Z, variational_posterior):
raise NotImplementedError
return self.psicomp.psicomputations(self, Z, variational_posterior)[0]
def psi1(self, Z, variational_posterior):
raise NotImplementedError
return self.psicomp.psicomputations(self, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior):
raise NotImplementedError
return self.psicomp.psicomputations(self, Z, variational_posterior)[2]
def gradients_X(self, dL_dK, X, X2):
raise NotImplementedError
def gradients_X_diag(self, dL_dKdiag, X):
@ -119,21 +122,22 @@ class Kern(Parameterized):
dL_dpsi1 * dpsi1_d{theta_i} +
dL_dpsi2 * dpsi2_d{theta_i}
"""
raise NotImplementedError
dtheta = self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[0]
self.gradient[:] = dtheta
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
Returns the derivative of the objective wrt Z, using the chain rule
through the expectation variables.
"""
raise NotImplementedError
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[1]
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
Compute the gradients wrt the parameters of the variational
distruibution q(X), chain-ruling via the expectations of the kernel
"""
raise NotImplementedError
return self.psicomp.psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)[2:]
def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs):
"""

View file

@ -78,7 +78,7 @@ class MLP(Kern):
*((vec1[:, None]+vec2[None, :])*self.weight_variance
+ 2*self.bias_variance + 2.))*base_cov_grad).sum()
def update_gradients_diag(self, X):
def update_gradients_diag(self, dL_dKdiag, X):
self._K_diag_computations(X)
self.variance.gradient = np.sum(self._K_diag_dvar*dL_dKdiag)

View file

@ -8,9 +8,10 @@ from . import rbf_psi_comp
from . import ssrbf_psi_comp
from . import sslinear_psi_comp
from . import linear_psi_comp
from .gaussherm import PSICOMP_GH
class PSICOMP_RBF(Pickleable):
@Cache_this(limit=2, ignore_args=(0,))
@Cache_this(limit=10, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
@ -19,7 +20,7 @@ class PSICOMP_RBF(Pickleable):
else:
raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=2, ignore_args=(0,1,2,3))
@Cache_this(limit=10, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
@ -33,7 +34,7 @@ class PSICOMP_RBF(Pickleable):
class PSICOMP_Linear(Pickleable):
@Cache_this(limit=2, ignore_args=(0,))
@Cache_this(limit=10, ignore_args=(0,))
def psicomputations(self, variance, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return linear_psi_comp.psicomputations(variance, Z, variational_posterior)
@ -42,7 +43,7 @@ class PSICOMP_Linear(Pickleable):
else:
raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=2, ignore_args=(0,1,2,3))
@Cache_this(limit=10, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)

View file

@ -0,0 +1,92 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
An approximated psi-statistics implementation based on Gauss-Hermite Quadrature
"""
import numpy as np
from GPy.util.caching import Cache_this
from ....util.linalg import tdot
from ....core.parameterization.parameter_core import Pickleable
class PSICOMP_GH(Pickleable):
def __init__(self, degree=5, cache_K=True):
self.degree = degree
self.cache_K = cache_K
self.locs, self.weights = np.polynomial.hermite.hermgauss(degree)
self.locs *= np.sqrt(2.)
self.weights*= 1./np.sqrt(np.pi)
self.Xs = None
def _setup_observers(self):
pass
@Cache_this(limit=10, ignore_args=(0,))
def comp_K(self, Z, qX):
if self.Xs is None or self.Xs.shape != qX.mean.shape:
from ....core.parameterization import ObsAr
self.Xs = ObsAr(np.empty((self.degree,)+qX.mean.shape))
mu, S = qX.mean.values, qX.variance.values
S_sq = np.sqrt(S)
for i in xrange(self.degree):
self.Xs[i] = self.locs[i]*S_sq+mu
return self.Xs
@Cache_this(limit=10, ignore_args=(0,))
def psicomputations(self, kern, Z, qX):
mu, S = qX.mean.values, qX.variance.values
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
if self.cache_K: Xs = self.comp_K(Z, qX)
else: S_sq = np.sqrt(S)
psi0 = np.zeros((N,))
psi1 = np.zeros((N,M))
psi2 = np.zeros((M,M))
for i in xrange(self.degree):
if self.cache_K:
X = Xs[i]
else:
X = self.locs[i]*S_sq+mu
psi0 += self.weights[i]* kern.Kdiag(X)
Kfu = kern.K(X,Z)
psi1 += self.weights[i]* Kfu
psi2 += self.weights[i]* tdot(Kfu.T)
return psi0, psi1, psi2
@Cache_this(limit=10, ignore_args=(0, 2,3,4))
def psiDerivativecomputations(self, kern, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, qX):
mu, S = qX.mean.values, qX.variance.values
if self.cache_K: Xs = self.comp_K(Z, qX)
S_sq = np.sqrt(S)
dtheta_old = kern.gradient.copy()
dtheta = np.zeros_like(kern.gradient)
dZ = np.zeros_like(Z.values)
dmu = np.zeros_like(mu)
dS = np.zeros_like(S)
for i in xrange(self.degree):
if self.cache_K:
X = Xs[i]
else:
X = self.locs[i]*S_sq+mu
dL_dpsi0_i = dL_dpsi0*self.weights[i]
kern.update_gradients_diag(dL_dpsi0_i, X)
dtheta += kern.gradient
dX = kern.gradients_X_diag(dL_dpsi0_i, X)
Kfu = kern.K(X,Z)
dL_dkfu = (dL_dpsi1+ 2.*Kfu.dot(dL_dpsi2))*self.weights[i]
kern.update_gradients_full(dL_dkfu, X, Z)
dtheta += kern.gradient
dX += kern.gradients_X(dL_dkfu, X, Z)
dZ += kern.gradients_X(dL_dkfu.T, Z, X)
dmu += dX
dS += dX*self.locs[i]/(2.*S_sq)
kern.gradient[:] = dtheta_old
return dtheta, dZ, dmu, dS

View file

@ -55,16 +55,15 @@ def __psi2computations(variance, lengthscale, Z, mu, S):
# Produced intermediate results:
# _psi2 MxM
N,M,Q = mu.shape[0], Z.shape[0], mu.shape[1]
lengthscale2 = np.square(lengthscale)
_psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
_psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
denom = 1./(2.*S+lengthscale2)
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+(2*(mu*denom).dot(Z_hat.reshape(M*M,Q).T) - denom.dot(np.square(Z_hat).reshape(M*M,Q).T)).reshape(N,M,M)
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
@ -157,5 +156,5 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
_psi1computations = Cacher(__psi1computations, limit=1)
_psi2computations = Cacher(__psi2computations, limit=1)
_psi1computations = Cacher(__psi1computations, limit=5)
_psi2computations = Cacher(__psi2computations, limit=5)

View file

@ -20,7 +20,6 @@ class RBF(Stationary):
_support_GPU = True
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
self.psicomp = PSICOMP_RBF()
if self.useGPU:
self.psicomp = PSICOMP_RBF_GPU()
else:
@ -36,6 +35,7 @@ class RBF(Stationary):
dc = super(RBF, self).__getstate__()
if self.useGPU:
dc['psicomp'] = PSICOMP_RBF()
dc['useGPU'] = False
return dc
def __setstate__(self, state):

52
GPy/kern/_src/spline.py Normal file
View file

@ -0,0 +1,52 @@
# Copyright (c) 2015, Thomas Hornung
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from .kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Spline(Kern):
"""
Linear spline kernel. You need to specify 2 parameters: the variance and c.
The variance is defined in powers of 10. Thus specifying -2 means 10^-2.
The parameter c allows to define the stiffness of the spline fit. A very stiff
spline equals linear regression.
See https://www.youtube.com/watch?v=50Vgw11qn0o starting at minute 1:17:28
Lit: Wahba, 1990
"""
def __init__(self, input_dim, variance=1., c=1., active_dims=None, name='spline'):
super(Spline, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.c = Param('c', c)
self.link_parameters(self.variance,self.c)
def K(self, X, X2=None):
if X2 is None: X2=X
term1 = (X+8.)*(X2.T+8.)/16.
term2 = abs((X-X2.T)/16.)**3
term3 = ((X+8.)/16.)**3 + ((X2.T+8.)/16.)**3
return (self.variance**2 * (1. + (1.+self.c) * term1 + self.c/3. * (term2 - term3)))
def Kdiag(self, X):
term1 = np.square(X+8.,X+8.)/16.
term3 = 2. * ((X+8.)/16.)**3
return (self.variance**2 * (1. + (1.+self.c) * term1 - self.c/3. * term3))[:,0]
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None: X2=X
term1 = (X+8.)*(X2.T+8.)/16.
term2 = abs((X-X2.T)/16.)**3
term3 = ((X+8.)/16.)**3 + ((X2.T+8.)/16.)**3
self.variance.gradient = np.sum(dL_dK * (2*self.variance * (1. + (1.+self.c) * term1 + self.c/3. * ( term2 - term3))))
self.c.gradient = np.sum(dL_dK * (self.variance**2* (term1 + 1./3.*(term2 - term3))))
def update_gradients_diag(self, dL_dKdiag, X):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2=None):
raise NotImplementedError
def gradients_X_diag(self, dL_dKdiag, X):
raise NotImplementedError

View file

@ -0,0 +1,166 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The standard periodic kernel which mentioned in:
[1] Gaussian Processes for Machine Learning, C. E. Rasmussen, C. K. I. Williams.
The MIT Press, 2005.
[2] Introduction to Gaussian processes. D. J. C. MacKay. In C. M. Bishop, editor,
Neural Networks and Machine Learning, pages 133-165. Springer, 1998.
"""
from .kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
class StdPeriodic(Kern):
"""
Standart periodic kernel
.. math::
k(x,y) = \theta_1 \exp \left[ - \frac{1}{2} {}\sum_{i=1}^{input\_dim}
\left( \frac{\sin(\frac{\pi}{\lambda_i} (x_i - y_i) )}{l_i} \right)^2 \right] }
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\theta_1` in the formula above
:type variance: float
:param wavelength: the vector of wavelengths :math:`\lambda_i`. If None then 1.0 is assumed.
:type wavelength: array or list of the appropriate size (or float if there is only one wavelength parameter)
:param lengthscale: the vector of lengthscale :math:`\l_i`. If None then 1.0 is assumed.
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD1: Auto Relevance Determination with respect to wavelength.
If equal to "False" one single wavelength parameter :math:`\lambda_i` for
each dimension is assumed, otherwise there is one lengthscale
parameter per dimension.
:type ARD1: Boolean
:param ARD2: Auto Relevance Determination with respect to lengthscale.
If equal to "False" one single wavelength parameter :math:`l_i` for
each dimension is assumed, otherwise there is one lengthscale
parameter per dimension.
:type ARD2: Boolean
:param active_dims: indices of dimensions which are used in the computation of the kernel
:type wavelength: array or list of the appropriate size
:param name: Name of the kernel for output
:type String
:param useGPU: whether of not use GPU
:type Boolean
"""
def __init__(self, input_dim, variance=1., wavelength=None, lengthscale=None, ARD1=False, ARD2=False, active_dims=None, name='std_periodic',useGPU=False):
super(StdPeriodic, self).__init__(input_dim, active_dims, name, useGPU=useGPU)
self.input_dim = input_dim
self.ARD1 = ARD1 # correspond to wavelengths
self.ARD2 = ARD2 # correspond to lengthscales
self.name = name
if self.ARD1 == False:
if wavelength is not None:
wavelength = np.asarray(wavelength)
assert wavelength.size == 1, "Only one wavelength needed for non-ARD kernel"
else:
wavelength = np.ones(1)
else:
if wavelength is not None:
wavelength = np.asarray(wavelength)
assert wavelength.size == input_dim, "bad number of wavelengths"
else:
wavelength = np.ones(input_dim)
if self.ARD2 == False:
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
lengthscale = np.ones(1)
else:
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(input_dim)
self.variance = Param('variance', variance, Logexp())
assert self.variance.size==1, "Variance size must be one"
self.wavelengths = Param('wavelengths', wavelength, Logexp())
self.lengthscales = Param('lengthscales', lengthscale, Logexp())
self.link_parameters(self.variance, self.wavelengths, self.lengthscales)
def parameters_changed(self):
"""
This functions deals as a callback for each optimization iteration.
If one optimization step was successfull and the parameters
this callback function will be called to be able to update any
precomputations for the kernel.
"""
pass
def K(self, X, X2=None):
"""Compute the covariance matrix between X and X2."""
if X2 is None:
X2 = X
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths
exp_dist = np.exp( -0.5* np.sum( np.square( np.sin( base ) / self.lengthscales ), axis = -1 ) )
return self.variance * exp_dist
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X."""
ret = np.empty(X.shape[0])
ret[:] = self.variance
return ret
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None:
X2 = X
base = np.pi * (X[:, None, :] - X2[None, :, :]) / self.wavelengths
sin_base = np.sin( base )
exp_dist = np.exp( -0.5* np.sum( np.square( sin_base / self.lengthscales ), axis = -1 ) )
dwl = self.variance * (1.0/np.square(self.lengthscales)) * sin_base*np.cos(base) * (base / self.wavelengths)
dl = self.variance * np.square( sin_base) / np.power( self.lengthscales, 3)
self.variance.gradient = np.sum(exp_dist * dL_dK)
#target[0] += np.sum( exp_dist * dL_dK)
if self.ARD1: # different wavelengths
self.wavelengths.gradient = (dwl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
else: # same wavelengths
self.wavelengths.gradient = np.sum(dwl.sum(-1) * exp_dist * dL_dK)
if self.ARD2: # different lengthscales
self.lengthscales.gradient = (dl * exp_dist[:,:,None] * dL_dK[:, :, None]).sum(0).sum(0)
else: # same lengthscales
self.lengthscales.gradient = np.sum(dl.sum(-1) * exp_dist * dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
self.variance.gradient = np.sum(dL_dKdiag)
self.wavelengths.gradient = 0
self.lengthscales.gradient = 0
# def gradients_X(self, dL_dK, X, X2=None):
# """derivative of the covariance matrix with respect to X."""
#
# raise NotImplemented("Periodic kernel: dK_dX not implemented")
#
# def gradients_X_diag(self, dL_dKdiag, X):
#
# raise NotImplemented("Periodic kernel: dKdiag_dX not implemented")

View file

@ -13,7 +13,7 @@ from ...util.config import config # for assesing whether to use cython
from ...util.caching import Cache_this
try:
import stationary_cython
from . import stationary_cython
except ImportError:
print('warning in sationary: failed to import cython module: falling back to numpy')
config.set('cython', 'working', 'false')
@ -77,7 +77,7 @@ class Stationary(Kern):
def dK_dr(self, r):
raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
@Cache_this(limit=5, ignore_args=())
@Cache_this(limit=20, ignore_args=())
def K(self, X, X2=None):
"""
Kernel function applied on inputs X and X2.
@ -89,7 +89,7 @@ class Stationary(Kern):
r = self._scaled_dist(X, X2)
return self.K_of_r(r)
@Cache_this(limit=3, ignore_args=())
@Cache_this(limit=20, ignore_args=())
def dK_dr_via_X(self, X, X2):
#a convenience function, so we can cache dK_dr
return self.dK_dr(self._scaled_dist(X, X2))
@ -114,7 +114,7 @@ class Stationary(Kern):
r2 = np.clip(r2, 0, np.inf)
return np.sqrt(r2)
@Cache_this(limit=5, ignore_args=())
@Cache_this(limit=20, ignore_args=())
def _scaled_dist(self, X, X2=None):
"""
Efficiently compute the scaled distance, r.

File diff suppressed because it is too large Load diff

View file

@ -1,15 +1,18 @@
#cython: boundscheck=False
#cython: nonecheck=False
#cython: wraparound=False
import numpy as np
cimport numpy as np
from cython.parallel import prange
cimport cython
ctypedef np.float64_t DTYPE_t
cdef extern from "stationary_utils.h":
void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad)
void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad) nogil
cdef extern from "stationary_utils.h":
void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad)
void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad) nogil
def grad_X(int N, int D, int M,
np.ndarray[DTYPE_t, ndim=2] _X,
@ -20,9 +23,20 @@ def grad_X(int N, int D, int M,
cdef double *X2 = <double*> _X2.data
cdef double *tmp = <double*> _tmp.data
cdef double *grad = <double*> _grad.data
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
with nogil:
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
def lengthscale_grads(int N, int M, int Q,
@cython.cdivision(True)
def grad_X_cython(int N, int D, int M, double[:,:] X, double[:,:] X2, double[:,:] tmp, double[:,:] grad):
cdef int n,d,nd,m
for nd in prange(N * D, nogil=True):
n = nd / D
d = nd % D
grad[n,d] = 0.0
for m in range(M):
grad[n,d] += tmp[n, m] * (X[n, d] - X2[m, d])
def lengthscale_grads_in_c(int N, int M, int Q,
np.ndarray[DTYPE_t, ndim=2] _tmp,
np.ndarray[DTYPE_t, ndim=2] _X,
np.ndarray[DTYPE_t, ndim=2] _X2,
@ -31,6 +45,16 @@ def lengthscale_grads(int N, int M, int Q,
cdef double *X = <double*> _X.data
cdef double *X2 = <double*> _X2.data
cdef double *grad = <double*> _grad.data
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
with nogil:
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
def lengthscale_grads(int N, int M, int Q, double[:,:] tmp, double[:,:] X, double[:,:] X2, double[:] grad):
cdef int q, n, m
cdef double gradq, dist
with nogil:
for q in range(Q):
grad[q] = 0.0
for n in range(N):
for m in range(M):
dist = X[n,q] - X2[m,q]
grad[q] += tmp[n, m] * dist * dist

View file

@ -1,19 +1,36 @@
void _grad_X(int N, int D, int M, double* X, double* X2, double* tmp, double* grad){
int n,m,d;
double retnd;
//#pragma omp parallel for private(n,d, retnd, m)
for(d=0;d<D;d++){
for(n=0;n<N;n++){
retnd = 0.0;
for(m=0;m<M;m++){
retnd += tmp[n*M+m]*(X[n*D+d]-X2[m*D+d]);
}
grad[n*D+d] = retnd;
int n,d,nd,m;
#pragma omp parallel for private(nd,n,d, retnd, m)
for(nd=0;nd<(D*N);nd++){
n = nd/D;
d = nd%D;
retnd = 0.0;
for(m=0;m<M;m++){
retnd += tmp[n*M+m]*(X[nd]-X2[m*D+d]);
}
grad[nd] = retnd;
}
} //grad_X
void _lengthscale_grads_unsafe(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
int n,m,nm,q,nQ,mQ;
double dist;
#pragma omp parallel for private(n,m,nm,q,nQ,mQ,dist)
for(nm=0; nm<(N*M); nm++){
n = nm/M;
m = nm%M;
nQ = n*Q;
mQ = m*Q;
for(q=0; q<Q; q++){
dist = X[nQ+q]-X2[mQ+q];
grad[q] += tmp[nm]*dist*dist;
}
}
} //lengthscale_grads
void _lengthscale_grads(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
int n,m,q;
double gradq, dist;
@ -33,3 +50,5 @@ for(q=0; q<Q; q++){

View file

@ -85,6 +85,7 @@ class Bernoulli(Likelihood):
gh_x, gh_w = gh_points
gh_w = gh_w / np.sqrt(np.pi)
shape = m.shape
m,v,Y = m.flatten(), v.flatten(), Y.flatten()
Ysign = np.where(Y==1,1,-1)
@ -232,6 +233,17 @@ class Bernoulli(Likelihood):
np.seterr(**state)
return d3logpdf_dlink3
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
"""
Get the "quantiles" of the binary labels (Bernoulli draws). all the
quantiles must be either 0 or 1, since those are the only values the
draw can take!
"""
p = self.predictive_mean(mu, var)
return [np.asarray(p>(q/100.), dtype=np.int32) for q in quantiles]
def samples(self, gp, Y_metadata=None):
"""
Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -316,6 +316,9 @@ class Gaussian(Likelihood):
return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
if not isinstance(self.gp_link, link_functions.Identity):
return super(Gaussian, self).variational_expectations(Y=Y, m=m, v=v, gh_points=gh_points, Y_metadata=Y_metadata)
lik_var = float(self.variance)
F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var
dF_dmu = (Y - m)/lik_var

View file

@ -143,7 +143,7 @@ class Likelihood(Parameterized):
p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf)
for yi, mi, vi, yi_m in zipped_values])
p_ystar = np.array(p_ystar).reshape(-1, 1)
p_ystar = np.array(p_ystar).reshape(*y_test.shape)
return np.log(p_ystar)
def log_predictive_density_sampling(self, y_test, mu_star, var_star, Y_metadata=None, num_samples=1000):
@ -173,6 +173,7 @@ class Likelihood(Parameterized):
from scipy.misc import logsumexp
log_p_ystar = -np.log(num_samples) + logsumexp(self.logpdf(fi_samples, y_test, Y_metadata=Y_metadata), axis=1)
log_p_ystar = np.array(log_p_ystar).reshape(*y_test.shape)
return log_p_ystar
@ -265,8 +266,8 @@ class Likelihood(Parameterized):
stop
if self.size:
dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None]) # Ntheta x (orig size) x N_{quad_points}
dF_dtheta = np.dot(dF_dtheta, gh_w)
dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None], Y_metadata=Y_metadata) # Ntheta x (orig size) x N_{quad_points}
dF_dtheta = np.dot(dF_dtheta, gh_w)/np.sqrt(np.pi)
dF_dtheta = dF_dtheta.reshape(self.size, shape[0], shape[1])
else:
dF_dtheta = None # Not yet implemented
@ -297,13 +298,8 @@ class Likelihood(Parameterized):
return self.conditional_mean(f)*p
scaled_mean = [quad(int_mean, fmin, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
return mean
def _conditional_mean(self, f):
"""Quadrature calculation of the conditional mean: E(Y_star|f)"""
raise NotImplementedError("implement this function to make predictions")
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
"""
Approximation to the predictive variance: V(Y_star)
@ -607,23 +603,30 @@ class Likelihood(Parameterized):
:param full_cov: whether to use the full covariance or just the diagonal
:type full_cov: Boolean
"""
pred_mean = self.predictive_mean(mu, var, Y_metadata)
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata)
try:
pred_mean = self.predictive_mean(mu, var, Y_metadata=Y_metadata)
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata=Y_metadata)
except NotImplementedError:
print("Finding predictive mean and variance via sampling rather than quadrature")
Nf_samp = 300
Ny_samp = 1
s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu
ss_y = self.samples(s, Y_metadata, samples=Ny_samp)
pred_mean = np.mean(ss_y, axis=1)[:, None]
pred_var = np.var(ss_y, axis=1)[:, None]
return pred_mean, pred_var
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
#compute the quantiles by sampling!!!
N_samp = 500
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
#ss_f = s.flatten()
#ss_y = self.samples(ss_f, Y_metadata)
#ss_y = self.samples(s, Y_metadata, samples=100)
ss_y = self.samples(s, Y_metadata)
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
Nf_samp = 300
Ny_samp = 1
s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu
ss_y = self.samples(s, Y_metadata, samples=Ny_samp)
#ss_y = ss_y.reshape(mu.shape[0], mu.shape[1], Nf_samp*Ny_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
pred_quantiles = [np.percentile(ss_y, q, axis=1)[:,None] for q in quantiles]
return pred_quantiles
def samples(self, gp, Y_metadata=None, samples=1):
"""

View file

@ -137,7 +137,7 @@ class Poisson(Likelihood):
"""
return self.gp_link.transf(gp)
def samples(self, gp, Y_metadata=None):
def samples(self, gp, Y_metadata=None, samples=1):
"""
Returns a set of samples of observations based on a given value of the latent variable.
@ -145,5 +145,5 @@ class Poisson(Likelihood):
"""
orig_shape = gp.shape
gp = gp.flatten()
Ysim = np.random.poisson(self.gp_link.transf(gp))
return Ysim.reshape(orig_shape)
Ysim = np.random.poisson(self.gp_link.transf(gp), [samples, gp.size]).T
return Ysim.reshape(orig_shape+(samples,))

View file

@ -64,9 +64,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
self.logger.debug("creating inference_method var_dtc")
inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1])
if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
kernel.psicomp.GPU_direct = True
super(BayesianGPLVMMiniBatch,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
name=name, inference_method=inference_method,
normalizer=normalizer,

View file

@ -1,11 +1,11 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Copyright (c) 2015 James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from ..models import GPLVM
from ..mappings import *
from . import GPLVM
from .. import mappings
class BCGPLVM(GPLVM):
@ -16,33 +16,31 @@ class BCGPLVM(GPLVM):
:type Y: np.ndarray
:param input_dim: latent dimensionality
:type input_dim: int
:param init: initialisation method for the latent space
:type init: 'PCA'|'random'
:param mapping: mapping for back constraint
:type mapping: GPy.core.Mapping object
"""
def __init__(self, Y, input_dim, init='PCA', X=None, kernel=None, normalize_Y=False, mapping=None):
def __init__(self, Y, input_dim, kernel=None, mapping=None):
if mapping is None:
mapping = Kernel(X=Y, output_dim=input_dim)
mapping = mappings.MLP(input_dim=Y.shape[1],
output_dim=input_dim,
hidden_dim=10)
else:
assert mapping.input_dim==Y.shape[1], "mapping input dim does not work for Y dimension"
assert mapping.output_dim==input_dim, "mapping output dim does not work for self.input_dim"
GPLVM.__init__(self, Y, input_dim, X=mapping.f(Y), kernel=kernel, name="bcgplvm")
self.unlink_parameter(self.X)
self.mapping = mapping
GPLVM.__init__(self, Y, input_dim, init, X, kernel, normalize_Y)
self.X = self.mapping.f(self.likelihood.Y)
self.link_parameter(self.mapping)
def _get_param_names(self):
return self.mapping._get_param_names() + GP._get_param_names(self)
self.X = self.mapping.f(self.Y)
def _get_params(self):
return np.hstack((self.mapping._get_params(), GP._get_params(self)))
def parameters_changed(self):
self.X = self.mapping.f(self.Y)
GP.parameters_changed(self)
Xgradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
self.mapping.update_gradients(Xgradient, self.Y)
def _set_params(self, x):
self.mapping._set_params(x[:self.mapping.num_params])
self.X = self.mapping.f(self.likelihood.Y)
GP._set_params(self, x[self.mapping.num_params:])
def _log_likelihood_gradients(self):
dL_df = self.kern.gradients_X(self.dL_dK, self.X)
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))

View file

@ -3,7 +3,7 @@
import numpy as np
from .. import kern
from bayesian_gplvm import BayesianGPLVM
from .bayesian_gplvm import BayesianGPLVM
from ..core.parameterization.variational import NormalPosterior, NormalPrior
class DPBayesianGPLVM(BayesianGPLVM):
@ -15,5 +15,5 @@ class DPBayesianGPLVM(BayesianGPLVM):
name='bayesian gplvm', mpi_comm=None, normalizer=None,
missing_data=False, stochastic=False, batchsize=1):
super(DPBayesianGPLVM,self).__init__(Y=Y, input_dim=input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, mpi_comm=mpi_comm, normalizer=normalizer, missing_data=missing_data, stochastic=stochastic, batchsize=batchsize, name='dp bayesian gplvm')
self.X.mean.set_prior(X_prior)
self.X.mean.set_prior(X_prior)
self.link_parameter(X_prior)

View file

@ -16,6 +16,7 @@ class GPRegression(GP):
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf
:param Norm normalizer: [False]
:param noise_var: the noise variance for Gaussian likelhood, defaults to 1.
Normalize Y with the norm given.
If normalizer is False, no normalization will be done
@ -25,12 +26,12 @@ class GPRegression(GP):
"""
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None):
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1.):
if kernel is None:
kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Gaussian()
likelihood = likelihoods.Gaussian(variance=noise_var)
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer)

View file

@ -2,20 +2,18 @@
# Distributed under the terms of the GNU General public License, see LICENSE.txt
import numpy as np
from scipy import stats
from scipy.special import erf
from ..core.model import Model
from ..core import GP
from ..core.parameterization import ObsAr
from .. import kern
from ..core.parameterization.param import Param
from ..util.linalg import pdinv
from ..inference.latent_function_inference import VarGauss
log_2_pi = np.log(2*np.pi)
class GPVariationalGaussianApproximation(Model):
class GPVariationalGaussianApproximation(GP):
"""
The Variational Gaussian Approximation revisited implementation for regression
The Variational Gaussian Approximation revisited
@article{Opper:2009,
title = {The Variational Gaussian Approximation Revisited},
@ -25,84 +23,15 @@ class GPVariationalGaussianApproximation(Model):
pages = {786--792},
}
"""
def __init__(self, X, Y, kernel=None):
Model.__init__(self,'Variational GP classification')
# accept the construction arguments
self.X = ObsAr(X)
if kernel is None:
kernel = kern.RBF(X.shape[1]) + kern.White(X.shape[1], 0.01)
self.kern = kernel
self.link_parameter(self.kern)
self.num_data, self.input_dim = self.X.shape
def __init__(self, X, Y, kernel, likelihood, Y_metadata=None):
num_data = Y.shape[0]
self.alpha = Param('alpha', np.zeros((num_data,1))) # only one latent fn for now.
self.beta = Param('beta', np.ones(num_data))
inf = VarGauss(self.alpha, self.beta)
super(GPVariationalGaussianApproximation, self).__init__(X, Y, kernel, likelihood, name='VarGP', inference_method=inf)
self.alpha = Param('alpha', np.zeros(self.num_data))
self.beta = Param('beta', np.ones(self.num_data))
self.link_parameter(self.alpha)
self.link_parameter(self.beta)
self.gh_x, self.gh_w = np.polynomial.hermite.hermgauss(20)
self.Ysign = np.where(Y==1, 1, -1).flatten()
def log_likelihood(self):
"""
Marginal log likelihood evaluation
"""
return self._log_lik
def likelihood_quadrature(self, m, v):
"""
Perform Gauss-Hermite quadrature over the log of the likelihood, with a fixed weight
"""
# assume probit for now.
X = self.gh_x[None, :]*np.sqrt(2.*v[:, None]) + (m*self.Ysign)[:, None]
p = stats.norm.cdf(X)
N = stats.norm.pdf(X)
F = np.log(p).dot(self.gh_w)
NoverP = N/p
dF_dm = (NoverP*self.Ysign[:,None]).dot(self.gh_w)
dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(self.gh_w)
return F, dF_dm, dF_dv
def parameters_changed(self):
K = self.kern.K(self.X)
m = K.dot(self.alpha)
KB = K*self.beta[:, None]
BKB = KB*self.beta[None, :]
A = np.eye(self.num_data) + BKB
Ai, LA, _, Alogdet = pdinv(A)
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
var = np.diag(Sigma)
F, dF_dm, dF_dv = self.likelihood_quadrature(m, var)
dF_da = np.dot(K, dF_dm)
SigmaB = Sigma*self.beta
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv)).dot(SigmaB))*2
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + m.dot(self.alpha))
dKL_da = m
A_A2 = Ai - Ai.dot(Ai)
dKL_db = np.diag(np.dot(KB.T, A_A2))
self._log_lik = F.sum() - KL
self.alpha.gradient = dF_da - dKL_da
self.beta.gradient = dF_db - dKL_db
# K-gradients
dKL_dK = 0.5*(self.alpha[None, :]*self.alpha[:, None] + self.beta[:, None]*self.beta[None, :]*A_A2)
tmp = Ai*self.beta[:, None]/self.beta[None, :]
dF_dK = self.alpha[:, None]*dF_dm[None, :] + np.dot(tmp*dF_dv, tmp.T)
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
def predict(self, Xnew):
"""
Predict the function(s) at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
"""
Wi, _, _, _ = pdinv(self.kern.K(self.X) + np.diag(self.beta**-2))
Kux = self.kern.K(self.X, Xnew)
mu = np.dot(Kux.T, self.alpha)
WiKux = np.dot(Wi, Kux)
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(WiKux*Kux, 0)
return 0.5*(1+erf(mu/np.sqrt(2.*(var+1))))

View file

@ -58,12 +58,15 @@ class GPLVM(GP):
return target
def plot(self):
assert self.likelihood.Y.shape[1] == 2
pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) # @UndefinedVariable
assert self.Y.shape[1] == 2, "too high dimensional to plot. Try plot_latent"
from matplotlib import pyplot as plt
plt.scatter(self.Y[:, 0],
self.Y[:, 1],
40, self.X[:, 0].copy(),
linewidth=0, cmap=plt.cm.jet)
Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
mu, _ = self.predict(Xnew)
import pylab as pb
pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
plt.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,

View file

@ -228,14 +228,14 @@ class HessianChecker(GradientChecker):
if verbose:
if block_indices:
print "\nBlock {}".format(block_indices)
print("\nBlock {}".format(block_indices))
else:
print "\nAll blocks"
print("\nAll blocks")
header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference']
header_string = map(lambda x: ' | '.join(header), [header])
separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator])
print('\n'.join([header_string[0], separator]))
min_r = '%.6f' % float(numpy.min(ratio))
max_r = '%.6f' % float(numpy.max(ratio))
max_d = '%.6f' % float(numpy.max(difference))
@ -248,7 +248,7 @@ class HessianChecker(GradientChecker):
checked = "\033[91m False \033[0m"
grad_string = "{} | {} | {} | {} | {} ".format(checked, cols[0], cols[1], cols[2], cols[3])
print grad_string
print(grad_string)
if plot:
import pylab as pb
@ -348,7 +348,7 @@ class SkewChecker(HessianChecker):
numeric_hess_partial = nd.Jacobian(self._df, vectorized=True)
numeric_hess = numeric_hess_partial(x)
print "Done making numerical hessian"
print("Done making numerical hessian")
if analytic_hess.dtype is np.dtype('object'):
#Blockify numeric_hess aswell
blocksizes, pagesizes = get_block_shapes_3d(analytic_hess)
@ -365,7 +365,7 @@ class SkewChecker(HessianChecker):
#Unless super_plot is set, just plot the first one
p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False
if verbose:
print "Checking derivative of hessian wrt parameter number {}".format(block_ind)
print("Checking derivative of hessian wrt parameter number {}".format(block_ind))
check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p)
current_index += current_size

View file

@ -63,33 +63,18 @@ class SparseGPMiniBatch(SparseGP):
if stochastic and missing_data:
self.missing_data = True
self.ninan = ~np.isnan(Y)
self.stochastics = SparseGPStochastics(self, batchsize)
elif stochastic and not missing_data:
self.missing_data = False
self.stochastics = SparseGPStochastics(self, batchsize)
elif missing_data:
self.missing_data = True
self.ninan = ~np.isnan(Y)
self.stochastics = SparseGPMissing(self)
else:
self.stochastics = False
logger.info("Adding Z as parameter")
self.link_parameter(self.Z, index=0)
if self.missing_data:
self.Ylist = []
overall = self.Y_normalized.shape[1]
m_f = lambda i: "Precomputing Y for missing data: {: >7.2%}".format(float(i+1)/overall)
message = m_f(-1)
print(message, end=' ')
for d in range(overall):
self.Ylist.append(self.Y_normalized[self.ninan[:, d], d][:, None])
print(' '*(len(message)+1) + '\r', end=' ')
message = m_f(d)
print(message, end=' ')
print('')
self.posterior = None
def has_uncertain_inputs(self):
@ -245,8 +230,7 @@ class SparseGPMiniBatch(SparseGP):
message = m_f(-1)
print(message, end=' ')
for d in self.stochastics.d:
ninan = self.ninan[:, d]
for d, ninan in self.stochastics.d:
if not self.stochastics:
print(' '*(len(message)) + '\r', end=' ')
@ -257,7 +241,7 @@ class SparseGPMiniBatch(SparseGP):
grad_dict, current_values, value_indices = self._inner_parameters_changed(
self.kern, self.X[ninan],
self.Z, self.likelihood,
self.Ylist[d], self.Y_metadata,
self.Y_normalized[ninan][:, d], self.Y_metadata,
Lm, dL_dKmm,
subset_indices=dict(outputs=d, samples=ninan))
@ -266,8 +250,8 @@ class SparseGPMiniBatch(SparseGP):
Lm = posterior.K_chol
dL_dKmm = grad_dict['dL_dKmm']
woodbury_inv[:, :, d] = posterior.woodbury_inv
woodbury_vector[:, d:d+1] = posterior.woodbury_vector
woodbury_inv[:, :, d] = posterior.woodbury_inv[:,:,None]
woodbury_vector[:, d] = posterior.woodbury_vector
self._log_marginal_likelihood += log_marginal_likelihood
if not self.stochastics:
print('')

View file

@ -5,11 +5,95 @@ import numpy as np
from ..core.sparse_gp_mpi import SparseGP_MPI
from .. import kern
from ..core.parameterization import Param
from ..likelihoods import Gaussian
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior,VariationalPrior
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
from ..kern._src.psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF_GPU
class IBPPosterior(SpikeAndSlabPosterior):
'''
The SpikeAndSlab distribution for variational approximations.
'''
def __init__(self, means, variances, binary_prob, tau=None, sharedX=False, name='latent space'):
"""
binary_prob : the probability of the distribution on the slab part.
"""
from ..core.parameterization.transformations import Logexp
super(IBPPosterior, self).__init__(means, variances, binary_prob, group_spike=True, name=name)
self.sharedX = sharedX
if sharedX:
self.mean.fix(warning=False)
self.variance.fix(warning=False)
self.tau = Param("tau_", np.ones((self.gamma_group.shape[0],2)), Logexp())
self.link_parameter(self.tau)
def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient, self.tau.gradient = grad
def __getitem__(self, s):
if isinstance(s, (int, slice, tuple, list, np.ndarray)):
import copy
n = self.__new__(self.__class__, self.name)
dc = self.__dict__.copy()
dc['mean'] = self.mean[s]
dc['variance'] = self.variance[s]
dc['binary_prob'] = self.binary_prob[s]
dc['tau'] = self.tau
dc['parameters'] = copy.copy(self.parameters)
n.__dict__.update(dc)
n.parameters[dc['mean']._parent_index_] = dc['mean']
n.parameters[dc['variance']._parent_index_] = dc['variance']
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
n.parameters[dc['tau']._parent_index_] = dc['tau']
n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size - self.gamma.size - self.tau.size
n.size = n.mean.size + n.variance.size + n.gamma.size+ n.tau.size + oversize
n.ndim = n.mean.ndim
n.shape = n.mean.shape
n.num_data = n.mean.shape[0]
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
return n
else:
return super(IBPPosterior, self).__getitem__(s)
class IBPPrior(VariationalPrior):
def __init__(self, input_dim, alpha =2., name='IBPPrior', **kw):
super(IBPPrior, self).__init__(name=name, **kw)
from ..core.parameterization.transformations import Logexp, __fixed__
self.input_dim = input_dim
self.variance = 1.
self.alpha = Param('alpha', alpha, __fixed__)
self.link_parameter(self.alpha)
def KL_divergence(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
part1 = (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
ad = self.alpha/self.input_dim
from scipy.special import betaln,digamma
part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + betaln(ad,1.)*self.input_dim \
-betaln(tau[:,0], tau[:,1]).sum() + ((tau[:,0]-gamma-ad)*digamma(tau[:,0])).sum() + \
((tau[:,1]+gamma-2.)*digamma(tau[:,1])).sum() + ((2.+ad-tau[:,0]-tau[:,1])*digamma(tau.sum(axis=1))).sum()
return part1+part2
def update_gradients_KL(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
variational_posterior.mean.gradient -= gamma*mu/self.variance
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * gamma /2.
from scipy.special import digamma,polygamma
dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
ad = self.alpha/self.input_dim
common = (ad+2-tau[:,0]-tau[:,1])*polygamma(1,tau.sum(axis=1))
variational_posterior.tau.gradient[:,0] = -((tau[:,0]-gamma-ad)*polygamma(1,tau[:,0])+common)
variational_posterior.tau.gradient[:,1] = -((tau[:,1]+gamma-2)*polygamma(1,tau[:,1])+common)
class SSGPLVM(SparseGP_MPI):
"""
Spike-and-Slab Gaussian Process Latent Variable Model
@ -23,9 +107,11 @@ class SSGPLVM(SparseGP_MPI):
"""
def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, mpi_comm=None, pi=None, learnPi=True,normalizer=False, **kwargs):
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, IBP=False, alpha=2., tau=None, mpi_comm=None, pi=None, learnPi=False,normalizer=False, sharedX=False, variational_prior=None,**kwargs):
self.group_spike = group_spike
self.init = init
self.sharedX = sharedX
if X == None:
from ..util.initialization import initialize_latent
@ -33,8 +119,6 @@ class SSGPLVM(SparseGP_MPI):
else:
fracs = np.ones(input_dim)
self.init = init
if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape)
@ -64,17 +148,16 @@ class SSGPLVM(SparseGP_MPI):
if pi is None:
pi = np.empty((input_dim))
pi[:] = 0.5
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi) # the prior probability of the latent binary variable b
X = SpikeAndSlabPosterior(X, X_variance, gamma)
if IBP:
self.variational_prior = IBPPrior(input_dim=input_dim, alpha=alpha) if variational_prior is None else variational_prior
X = IBPPosterior(X, X_variance, gamma, tau=tau,sharedX=sharedX)
else:
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) if variational_prior is None else variational_prior
X = SpikeAndSlabPosterior(X, X_variance, gamma, group_spike=group_spike,sharedX=sharedX)
super(SSGPLVM,self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
# self.X.unfix()
# self.X.variance.constrain_positive()
self.link_parameter(self.X, index=0)
if self.group_spike:
[self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in range(self.X.gamma.shape[1])] # Tie columns together
def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form."""
@ -84,9 +167,15 @@ class SSGPLVM(SparseGP_MPI):
"""Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient, X.binary_prob.gradient
def _propogate_X_val(self):
pass
def parameters_changed(self):
self.X.propogate_val()
if self.sharedX: self._highest_parent_._propogate_X_val()
super(SSGPLVM,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch):
self.X.collate_gradient()
return
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
@ -95,6 +184,7 @@ class SSGPLVM(SparseGP_MPI):
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X)
self.X.collate_gradient()
def input_sensitivity(self):
if self.kern.ARD:

View file

@ -2,33 +2,256 @@
The Maniforld Relevance Determination model with the spike-and-slab prior
"""
import numpy as np
from ..core import Model
from .ss_gplvm import SSGPLVM
from ..core.parameterization.variational import SpikeAndSlabPrior,NormalPosterior,VariationalPrior
from ..util.misc import param_to_array
from ..kern import RBF
from ..core import Param
from numpy.linalg.linalg import LinAlgError
class SSMRD(Model):
def __init__(self, Ylist, input_dim, X=None, X_variance=None,
initx = 'PCA', initz = 'permute',
num_inducing=10, Z=None, kernel=None,
inference_method=None, likelihoods=None, name='ss_mrd', Ynames=None):
def __init__(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx = 'PCA_concat', initz = 'permute',
num_inducing=10, Zs=None, kernels=None, inference_methods=None, likelihoods=None, group_spike=True,
pi=0.5, name='ss_mrd', Ynames=None, mpi_comm=None, IBP=False, alpha=2., taus=None, ):
super(SSMRD, self).__init__(name)
self.mpi_comm = mpi_comm
self._PROPAGATE_ = False
self.updates = False
self.models = [SSGPLVM(y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing,Z=Z,init=initx,
kernel=kernel.copy() if kernel else None,inference_method=inference_method,likelihood=likelihoods,
name='model_'+str(i)) for i,y in enumerate(Ylist)]
self.add_parameters(*(self.models))
# initialize X for individual models
X, X_variance, Gammas, fracs = self._init_X(Ylist, input_dim, X, X_variance, Gammas, initx)
self.X = NormalPosterior(means=X, variances=X_variance)
[[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.mean.shape[1])]
for i in range(self.models[0].X.mean.shape[0])]
[[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.variance.shape[1])]
for i in range(self.models[0].X.variance.shape[0])]
if kernels is None:
kernels = [RBF(input_dim, lengthscale=1./fracs, ARD=True) for i in xrange(len(Ylist))]
if Zs is None:
Zs = [None]* len(Ylist)
if likelihoods is None:
likelihoods = [None]* len(Ylist)
if inference_methods is None:
inference_methods = [None]* len(Ylist)
self.updates = True
if IBP:
self.var_priors = [IBPPrior_SSMRD(len(Ylist),input_dim,alpha=alpha) for i in xrange(len(Ylist))]
else:
self.var_priors = [SpikeAndSlabPrior_SSMRD(nModels=len(Ylist),pi=pi,learnPi=False, group_spike=group_spike) for i in xrange(len(Ylist))]
self.models = [SSGPLVM(y, input_dim, X=X.copy(), X_variance=X_variance.copy(), Gamma=Gammas[i], num_inducing=num_inducing,Z=Zs[i], learnPi=False, group_spike=group_spike,
kernel=kernels[i],inference_method=inference_methods[i],likelihood=likelihoods[i], variational_prior=self.var_priors[i], IBP=IBP, tau=None if taus is None else taus[i],
name='model_'+str(i), mpi_comm=mpi_comm, sharedX=True) for i,y in enumerate(Ylist)]
self.link_parameters(*(self.models+[self.X]))
def _propogate_X_val(self):
if self._PROPAGATE_: return
for m in self.models:
m.X.mean.values[:] = self.X.mean.values
m.X.variance.values[:] = self.X.variance.values
varp_list = [m.X for m in self.models]
[vp._update_inernal(varp_list) for vp in self.var_priors]
self._PROPAGATE_=True
def _collate_X_gradient(self):
self._PROPAGATE_ = False
self.X.mean.gradient[:] = 0
self.X.variance.gradient[:] = 0
for m in self.models:
self.X.mean.gradient += m.X.mean.gradient
self.X.variance.gradient += m.X.variance.gradient
def parameters_changed(self):
super(SSMRD, self).parameters_changed()
super(SSMRD, self).parameters_changed()
[m.parameters_changed() for m in self.models]
self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
self._collate_X_gradient()
def log_likelihood(self):
return self._log_marginal_likelihood
def _init_X(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx='PCA_concat'):
# Divide latent dimensions
idx = np.empty((input_dim,),dtype=np.int)
residue = (input_dim)%(len(Ylist))
for i in xrange(len(Ylist)):
if i < residue:
size = input_dim/len(Ylist)+1
idx[i*size:(i+1)*size] = i
else:
size = input_dim/len(Ylist)
idx[i*size+residue:(i+1)*size+residue] = i
if X is None:
if initx == 'PCA_concat':
X = np.empty((Ylist[0].shape[0],input_dim))
fracs = np.empty((input_dim,))
from ..util.initialization import initialize_latent
for i in xrange(len(Ylist)):
Y = Ylist[i]
dim = (idx==i).sum()
if dim>0:
x, fr = initialize_latent('PCA', dim, Y)
X[:,idx==i] = x
fracs[idx==i] = fr
elif initx=='PCA_joint':
y = np.hstack(Ylist)
from ..util.initialization import initialize_latent
X, fracs = initialize_latent('PCA', input_dim, y)
else:
X = np.random.randn(Ylist[0].shape[0], input_dim)
fracs = np.ones(input_dim)
else:
fracs = np.ones(input_dim)
if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape)
if Gammas is None:
Gammas = []
for x in X:
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
gamma[gamma>1.-1e-9] = 1.-1e-9
gamma[gamma<1e-9] = 1e-9
Gammas.append(gamma)
return X, X_variance, Gammas, fracs
@Model.optimizer_array.setter
def optimizer_array(self, p):
if self.mpi_comm != None:
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
self.mpi_comm.Bcast(np.int32(1),root=0)
self.mpi_comm.Bcast(p, root=0)
Model.optimizer_array.fset(self,p)
def optimize(self, optimizer=None, start=None, **kwargs):
self._IN_OPTIMIZATION_ = True
if self.mpi_comm==None:
super(SSMRD, self).optimize(optimizer,start,**kwargs)
elif self.mpi_comm.rank==0:
super(SSMRD, self).optimize(optimizer,start,**kwargs)
self.mpi_comm.Bcast(np.int32(-1),root=0)
elif self.mpi_comm.rank>0:
x = self.optimizer_array.copy()
flag = np.empty(1,dtype=np.int32)
while True:
self.mpi_comm.Bcast(flag,root=0)
if flag==1:
try:
self.optimizer_array = x
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError):
if self._fail_count >= self._allowed_failures:
raise
self._fail_count += 1
elif flag==-1:
break
else:
self._IN_OPTIMIZATION_ = False
raise Exception("Unrecognizable flag for synchronization!")
self._IN_OPTIMIZATION_ = False
class SpikeAndSlabPrior_SSMRD(SpikeAndSlabPrior):
def __init__(self, nModels, pi=0.5, learnPi=False, group_spike=True, variance = 1.0, name='SSMRDPrior', **kw):
self.nModels = nModels
self._b_prob_all = 0.5
super(SpikeAndSlabPrior_SSMRD, self).__init__(pi=pi,learnPi=learnPi,group_spike=group_spike,variance=variance, name=name, **kw)
def _update_inernal(self, varp_list):
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
# The probability for the binary variable for the same latent dimension of any of the models is on.
if self.group_spike:
self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
[np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
else:
self._b_prob_all = 1.-param_to_array(varp_list[0].binary_prob)
[np.multiply(self._b_prob_all, 1.-vp.binary_prob, self._b_prob_all) for vp in varp_list[1:]]
def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
if self.group_spike:
gamma = variational_posterior.binary_prob[0]
else:
gamma = variational_posterior.binary_prob
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
return var_gamma +((1.-self._b_prob_all)*(np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
N = variational_posterior.num_data
if self.group_spike:
gamma = variational_posterior.binary_prob.values[0]
else:
gamma = variational_posterior.binary_prob.values
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
if self.group_spike:
tmp = self._b_prob_all/(1.-gamma)
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/N +tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
else:
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
S.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
if self.learnPi:
raise 'Not Supported!'
class IBPPrior_SSMRD(VariationalPrior):
def __init__(self, nModels, input_dim, alpha =2., tau=None, name='IBPPrior', **kw):
super(IBPPrior_SSMRD, self).__init__(name=name, **kw)
from ..core.parameterization.transformations import Logexp, __fixed__
self.nModels = nModels
self._b_prob_all = 0.5
self.input_dim = input_dim
self.variance = 1.
self.alpha = Param('alpha', alpha, __fixed__)
self.link_parameter(self.alpha)
def _update_inernal(self, varp_list):
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
# The probability for the binary variable for the same latent dimension of any of the models is on.
self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
[np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
def KL_divergence(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
part1 = ((1.-self._b_prob_all)* (np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
ad = self.alpha/self.input_dim
from scipy.special import betaln,digamma
part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + (betaln(ad,1.)*self.input_dim -betaln(tau[:,0], tau[:,1]).sum())/self.nModels \
+ (( (tau[:,0]-ad)/self.nModels -gamma)*digamma(tau[:,0])).sum() + \
(((tau[:,1]-1.)/self.nModels+gamma-1.)*digamma(tau[:,1])).sum() + (((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*digamma(tau.sum(axis=1))).sum()
return part1+part2
def update_gradients_KL(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
variational_posterior.mean.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
from scipy.special import digamma,polygamma
tmp = self._b_prob_all/(1.-gamma)
dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data
variational_posterior.binary_prob.gradient -= dgamma+tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
ad = self.alpha/self.input_dim
common = ((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*polygamma(1,tau.sum(axis=1))
variational_posterior.tau.gradient[:,0] = -(((tau[:,0]-ad)/self.nModels -gamma)*polygamma(1,tau[:,0])+common)
variational_posterior.tau.gradient[:,1] = -(((tau[:,1]-1.)/self.nModels+gamma-1.)*polygamma(1,tau[:,1])+common)

View file

@ -3,7 +3,7 @@
try:
import Tango
#import Tango
import pylab as pb
except:
pass
@ -17,11 +17,11 @@ def ax_default(fignum, ax):
fig = ax.figure
return fig, ax
def meanplot(x, mu, color=Tango.colorsHex['darkBlue'], ax=None, fignum=None, linewidth=2,**kw):
def meanplot(x, mu, color='#3300FF', ax=None, fignum=None, linewidth=2,**kw):
_, axes = ax_default(fignum, ax)
return axes.plot(x,mu,color=color,linewidth=linewidth,**kw)
def gpplot(x, mu, lower, upper, edgecol=Tango.colorsHex['darkBlue'], fillcol=Tango.colorsHex['lightBlue'], ax=None, fignum=None, **kwargs):
def gpplot(x, mu, lower, upper, edgecol='#3300FF', fillcol='#33CCFF', ax=None, fignum=None, **kwargs):
_, axes = ax_default(fignum, ax)
mu = mu.flatten()

View file

@ -1,16 +1,14 @@
# Copyright (c) 2012-2015, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
try:
import Tango
import pylab as pb
except:
pass
import numpy as np
from . import Tango
from base_plots import gpplot, x_frame1D, x_frame2D
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from scipy import sparse
from ...core.parameterization.variational import VariationalPosterior
from matplotlib import pyplot as plt
def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
@ -63,7 +61,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#if len(which_data_ycols)==0:
#raise ValueError('No data selected for plotting')
if ax is None:
fig = pb.figure(num=fignum)
fig = plt.figure(num=fignum)
ax = fig.add_subplot(111)
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
@ -78,7 +76,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if predict_kw is None:
predict_kw = {}
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
@ -106,11 +104,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
upper = m + 2*np.sqrt(v)
else:
if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
else:
meta = None
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta, **predict_kw)
lower, upper = model.predict_quantiles(Xgrid, Y_metadata=meta)
extra_data = Xgrid[:,-1:].astype(np.int)
if Y_metadata is None:
Y_metadata = {'output_index': extra_data}
else:
Y_metadata['output_index'] = extra_data
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
fmu, fv = model._raw_predict(Xgrid, full_cov=False, **predict_kw)
lower, upper = model.likelihood.predictive_quantiles(fmu, fv, (2.5, 97.5), Y_metadata=Y_metadata)
for d in which_data_ycols:
@ -119,9 +120,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#optionally plot some samples
if samples: #NOTE not tested with fixed_inputs
Ysim = model.posterior_samples(Xgrid, samples)
Ysim = model.posterior_samples(Xgrid, samples, Y_metadata=Y_metadata)
print Ysim.shape
print Xnew.shape
for yi in Ysim.T:
plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], '#3300FF', linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
if samples_f: #NOTE not tested with fixed_inputs
@ -184,14 +187,16 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
m, _ = model._raw_predict(Xgrid, **predict_kw)
else:
if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
else:
meta = None
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta, **predict_kw)
extra_data = Xgrid[:,-1:].astype(np.int)
if Y_metadata is None:
Y_metadata = {'output_index': extra_data}
else:
Y_metadata['output_index'] = extra_data
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=plt.cm.jet)
if not plot_raw: plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=plt.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
#set the limits of the plot to some sensible values
ax.set_xlim(xmin[0], xmax[0])
@ -219,7 +224,7 @@ def plot_fit_f(model, *args, **kwargs):
kwargs['plot_raw'] = True
plot_fit(model,*args, **kwargs)
def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True):
def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True, X_all=False):
"""
Convenience function for returning back fixed_inputs where the other inputs
are fixed using fix_routine
@ -235,8 +240,13 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True):
f_inputs = []
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean.values.copy()
else:
elif isinstance(model.X, VariationalPosterior):
X = model.X.values.copy()
else:
if X_all:
X = model.X_all.copy()
else:
X = model.X.copy()
for i in range(X.shape[1]):
if i not in non_fixed_inputs:
if fix_routine == 'mean':

View file

@ -22,7 +22,7 @@ def plot(parameterized, fignum=None, ax=None, colors=None, figsize=(12, 6)):
lines = []
fills = []
bg_lines = []
means, variances = parameterized.mean, parameterized.variance
means, variances = parameterized.mean.values, parameterized.variance.values
x = np.arange(means.shape[0])
for i in range(means.shape[1]):
if ax is None:
@ -43,7 +43,7 @@ def plot(parameterized, fignum=None, ax=None, colors=None, figsize=(12, 6)):
if i < means.shape[1] - 1:
a.set_xticklabels('')
pb.draw()
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
a.figure.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
return dict(lines=lines, fills=fills, bg_lines=bg_lines)
def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_side=True):

View file

@ -408,12 +408,13 @@ class mocap_data_show_vpython(vpython_show):
class mocap_data_show(matplotlib_show):
"""Base class for visualizing motion capture data."""
def __init__(self, vals, axes=None, connect=None):
def __init__(self, vals, axes=None, connect=None, color='b'):
if axes==None:
fig = plt.figure()
axes = fig.add_subplot(111, projection='3d', aspect='equal')
matplotlib_show.__init__(self, vals, axes)
self.color = color
self.connect = connect
self.process_values()
self.initialize_axes()
@ -423,7 +424,7 @@ class mocap_data_show(matplotlib_show):
self.axes.figure.canvas.draw()
def draw_vertices(self):
self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2])
self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2], color=self.color)
def draw_edges(self):
self.line_handle = []
@ -442,7 +443,7 @@ class mocap_data_show(matplotlib_show):
z.append(self.vals[i, 2])
z.append(self.vals[j, 2])
z.append(np.NaN)
self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), 'b-')
self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), '-', color=self.color)
def modify(self, vals):
self.vals = vals.copy()
@ -450,7 +451,7 @@ class mocap_data_show(matplotlib_show):
self.initialize_axes_modify()
self.draw_vertices()
self.initialize_axes()
self.finalize_axes_modify()
#self.finalize_axes_modify()
self.draw_edges()
self.axes.figure.canvas.draw()
@ -469,12 +470,20 @@ class mocap_data_show(matplotlib_show):
self.line_handle[0].remove()
def finalize_axes(self):
self.axes.set_xlim(self.x_lim)
self.axes.set_ylim(self.y_lim)
self.axes.set_zlim(self.z_lim)
self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.])
# self.axes.set_xlim(self.x_lim)
# self.axes.set_ylim(self.y_lim)
# self.axes.set_zlim(self.z_lim)
# self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.])
# self.axes.set_aspect('equal')
extents = np.array([getattr(self.axes, 'get_{}lim'.format(dim))() for dim in 'xyz'])
sz = extents[:,1] - extents[:,0]
centers = np.mean(extents, axis=1)
maxsize = max(abs(sz))
r = maxsize/2
for ctr, dim in zip(centers, 'xyz'):
getattr(self.axes, 'set_{}lim'.format(dim))(ctr - r, ctr + r)
# self.axes.set_aspect('equal')
# self.axes.autoscale(enable=False)
def finalize_axes_modify(self):
@ -494,7 +503,7 @@ class stick_show(mocap_data_show):
class skeleton_show(mocap_data_show):
"""data_show class for visualizing motion capture data encoded as a skeleton with angles."""
def __init__(self, vals, skel, axes=None, padding=0):
def __init__(self, vals, skel, axes=None, padding=0, color='b'):
"""data_show class for visualizing motion capture data encoded as a skeleton with angles.
:param vals: set of modeled angles to use for printing in the axis when it's first created.
:type vals: np.array
@ -506,7 +515,7 @@ class skeleton_show(mocap_data_show):
self.skel = skel
self.padding = padding
connect = skel.connection_matrix()
mocap_data_show.__init__(self, vals, axes=axes, connect=connect)
mocap_data_show.__init__(self, vals, axes=axes, connect=connect, color=color)
def process_values(self):
"""Takes a set of angles and converts them to the x,y,z coordinates in the internal prepresentation of the class, ready for plotting.

View file

@ -9,8 +9,8 @@ These tests make sure that the opure python and cython codes work the same
class CythonTestChols(np.testing.TestCase):
def setUp(self):
self.flat = np.random.randn(45, 5)
self.triang = np.dstack([np.eye(20)[:,:,None] for i in range(3)])
self.flat = np.random.randn(45,5)
self.triang = np.array([np.eye(20) for i in range(3)])
def test_flat_to_triang(self):
L1 = choleskies._flat_to_triang_pure(self.flat)
L2 = choleskies._flat_to_triang_cython(self.flat)
@ -51,11 +51,16 @@ class test_stationary(np.testing.TestCase):
class test_choleskies_backprop(np.testing.TestCase):
def setUp(self):
self.dL, self.L = np.random.randn(2, 100, 100)
a =np.random.randn(10,12)
A = a.dot(a.T)
self.L = GPy.util.linalg.jitchol(A)
self.dL = np.random.randn(10,10)
def test(self):
r1 = GPy.util.choleskies._backprop_gradient_pure(self.dL, self.L)
r2 = GPy.util.choleskies.choleskies_cython.backprop_gradient(self.dL, self.L)
r3 = GPy.util.choleskies.choleskies_cython.backprop_gradient_par_c(self.dL, self.L)
np.testing.assert_allclose(r1, r2)
np.testing.assert_allclose(r1, r3)

View file

@ -8,11 +8,12 @@ The test cases for various inference algorithms
import unittest, itertools
import numpy as np
import GPy
#np.seterr(invalid='raise')
class InferenceXTestCase(unittest.TestCase):
def genData(self):
np.random.seed(1)
D1,D2,N = 12,12,50
x = np.linspace(0, 4 * np.pi, N)[:, None]

View file

@ -312,7 +312,12 @@ class KernelGradientTestsContinuous(unittest.TestCase):
k = GPy.kern.LinearFull(self.D, self.D-1)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_standard_periodic(self):
k = GPy.kern.StdPeriodic(self.D, self.D-1)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
class KernelTestsMiscellaneous(unittest.TestCase):
def setUp(self):
N, D = 100, 10

View file

@ -9,8 +9,7 @@ import inspect
from GPy.likelihoods import link_functions
from GPy.core.parameterization import Param
from functools import partial
#np.random.seed(300)
#np.random.seed(4)
fixed_seed = 7
#np.seterr(divide='raise')
def dparam_partial(inst_func, *args):
@ -105,6 +104,7 @@ class TestNoiseModels(object):
Generic model checker
"""
def setUp(self):
np.random.seed(fixed_seed)
self.N = 15
self.D = 3
self.X = np.random.rand(self.N, self.D)*10
@ -218,7 +218,8 @@ class TestNoiseModels(object):
"constraints": [(".*variance", self.constrain_positive)]
},
"laplace": True,
"ep": False # FIXME: Should be True when we have it working again
"ep": False, # FIXME: Should be True when we have it working again
"variational_expectations": True,
},
"Gaussian_log": {
"model": GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var),
@ -227,7 +228,8 @@ class TestNoiseModels(object):
"vals": [self.var],
"constraints": [(".*variance", self.constrain_positive)]
},
"laplace": True
"laplace": True,
"variational_expectations": True
},
#"Gaussian_probit": {
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
@ -252,7 +254,8 @@ class TestNoiseModels(object):
"link_f_constraints": [partial(self.constrain_bounded, lower=0, upper=1)],
"laplace": True,
"Y": self.binary_Y,
"ep": False # FIXME: Should be True when we have it working again
"ep": False, # FIXME: Should be True when we have it working again
"variational_expectations": True
},
"Exponential_default": {
"model": GPy.likelihoods.Exponential(),
@ -347,6 +350,10 @@ class TestNoiseModels(object):
ep = attributes["ep"]
else:
ep = False
if "variational_expectations" in attributes:
var_exp = attributes["variational_expectations"]
else:
var_exp = False
#if len(param_vals) > 1:
#raise NotImplementedError("Cannot support multiple params in likelihood yet!")
@ -377,6 +384,11 @@ class TestNoiseModels(object):
if ep:
#ep likelihood gradcheck
yield self.t_ep_fit_rbf_white, model, self.X, Y, f, Y_metadata, self.step, param_vals, param_names, param_constraints
if var_exp:
#Need to specify mu and var!
yield self.t_varexp, model, Y, Y_metadata
yield self.t_dexp_dmu, model, Y, Y_metadata
yield self.t_dexp_dvar, model, Y, Y_metadata
self.tearDown()
@ -603,6 +615,87 @@ class TestNoiseModels(object):
print(m)
assert m.checkgrad(verbose=1, step=step)
################
# variational expectations #
################
@with_setup(setUp, tearDown)
def t_varexp(self, model, Y, Y_metadata):
#Test that the analytic implementation (if it exists) matches the generic gauss
#hermite implementation
print("\n{}".format(inspect.stack()[0][3]))
#Make mu and var (marginal means and variances of q(f)) draws from a GP
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
L = GPy.util.linalg.jitchol(k)
mu = L.dot(np.random.randn(*Y.shape))
#Variance must be positive
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
expectation = model.variational_expectations(Y=Y, m=mu, v=var, gh_points=None, Y_metadata=Y_metadata)[0]
#Implementation of gauss hermite integration
shape = mu.shape
gh_x, gh_w= np.polynomial.hermite.hermgauss(50)
m,v,Y = mu.flatten(), var.flatten(), Y.flatten()
#make a grid of points
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + m[:,None]
#evaluate the likelhood for the grid. First ax indexes the data (and mu, var) and the second indexes the grid.
# broadcast needs to be handled carefully.
logp = model.logpdf(X, Y[:,None], Y_metadata=Y_metadata)
#average over the gird to get derivatives of the Gaussian's parameters
#division by pi comes from fact that for each quadrature we need to scale by 1/sqrt(pi)
expectation_gh = np.dot(logp, gh_w)/np.sqrt(np.pi)
expectation_gh = expectation_gh.reshape(*shape)
np.testing.assert_almost_equal(expectation, expectation_gh, decimal=5)
@with_setup(setUp, tearDown)
def t_dexp_dmu(self, model, Y, Y_metadata):
print("\n{}".format(inspect.stack()[0][3]))
#Make mu and var (marginal means and variances of q(f)) draws from a GP
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
L = GPy.util.linalg.jitchol(k)
mu = L.dot(np.random.randn(*Y.shape))
#Variance must be positive
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
expectation = functools.partial(model.variational_expectations, Y=Y, v=var, gh_points=None, Y_metadata=Y_metadata)
#Function to get the nth returned value
def F(mu):
return expectation(m=mu)[0]
def dmu(mu):
return expectation(m=mu)[1]
grad = GradientChecker(F, dmu, mu.copy(), 'm')
grad.randomize()
print(grad)
print(model)
assert grad.checkgrad(verbose=1)
@with_setup(setUp, tearDown)
def t_dexp_dvar(self, model, Y, Y_metadata):
print("\n{}".format(inspect.stack()[0][3]))
#Make mu and var (marginal means and variances of q(f)) draws from a GP
k = GPy.kern.RBF(1).K(np.linspace(0,1,Y.shape[0])[:, None])
L = GPy.util.linalg.jitchol(k)
mu = L.dot(np.random.randn(*Y.shape))
#Variance must be positive
var = np.abs(L.dot(np.random.randn(*Y.shape))) + 0.01
expectation = functools.partial(model.variational_expectations, Y=Y, m=mu, gh_points=None, Y_metadata=Y_metadata)
#Function to get the nth returned value
def F(var):
return expectation(v=var)[0]
def dvar(var):
return expectation(v=var)[2]
grad = GradientChecker(F, dvar, var.copy(), 'v')
self.constrain_positive('v', grad)
#grad.randomize()
print(grad)
print(model)
assert grad.checkgrad(verbose=1)
class LaplaceTests(unittest.TestCase):
"""
@ -610,6 +703,7 @@ class LaplaceTests(unittest.TestCase):
"""
def setUp(self):
np.random.seed(fixed_seed)
self.N = 15
self.D = 1
self.X = np.random.rand(self.N, self.D)*10

View file

@ -49,7 +49,7 @@ class LinkFunctionTests(np.testing.TestCase):
self.assertTrue(grad3.checkgrad(verbose=True))
if test_lim:
print "Testing limits"
print("Testing limits")
#Remove some otherwise we are too close to the limit for gradcheck to work effectively
lim_of_inf = lim_of_inf - 1e-4
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf)
@ -79,8 +79,7 @@ class LinkFunctionTests(np.testing.TestCase):
assert np.isinf(np.exp(np.log(self.f_upper_lim)))
#Check the clipping works
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
#Need to look at most significant figures here rather than the decimals
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), _lim_val, significant=5)
self.assertTrue(np.isfinite(link.transf(self.f_upper_lim)))
self.check_overflow(link, lim_of_inf)
#Check that it would otherwise fail

View file

@ -1,6 +1,7 @@
import numpy as np
import scipy as sp
import GPy
import warnings
class MiscTests(np.testing.TestCase):
"""
@ -11,8 +12,15 @@ class MiscTests(np.testing.TestCase):
self._lim_val_exp = np.log(self._lim_val)
def test_safe_exp_upper(self):
assert np.exp(self._lim_val_exp + 1) == np.inf
assert GPy.util.misc.safe_exp(self._lim_val_exp + 1) < np.inf
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always') # always print
assert np.isfinite(np.exp(self._lim_val_exp))
assert np.isinf(np.exp(self._lim_val_exp + 1))
assert np.isfinite(GPy.util.misc.safe_exp(self._lim_val_exp + 1))
print w
print len(w)
assert len(w)==1 # should have one overflow warning
def test_safe_exp_lower(self):
assert GPy.util.misc.safe_exp(1e-10) < np.inf

View file

@ -352,8 +352,8 @@ class GradientTests(np.testing.TestCase):
self.check_model(rbf, model_type='SparseGPRegression', dimension=2)
def test_SparseGPRegression_rbf_linear_white_kern_1D(self):
''' Testing the sparse GP regression with rbf kernel on 2d data '''
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1)
''' Testing the sparse GP regression with rbf kernel on 1d data '''
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1) + GPy.kern.White(1, 1e-5)
self.check_model(rbflin, model_type='SparseGPRegression', dimension=1)
def test_SparseGPRegression_rbf_linear_white_kern_2D(self):
@ -361,14 +361,12 @@ class GradientTests(np.testing.TestCase):
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2)
# @unittest.expectedFailure
def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs'''
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
raise unittest.SkipTest("This is not implemented yet!")
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1)
# @unittest.expectedFailure
def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs'''
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1)
@ -385,6 +383,16 @@ class GradientTests(np.testing.TestCase):
m = GPy.models.GPLVM(Y, input_dim, kernel=k)
self.assertTrue(m.checkgrad())
def test_BCGPLVM_rbf_bias_white_kern_2D(self):
""" Testing GPLVM with rbf + bias kernel """
N, input_dim, D = 50, 1, 2
X = np.random.rand(N, input_dim)
k = GPy.kern.RBF(input_dim, 0.5, 0.9 * np.ones((1,))) + GPy.kern.Bias(input_dim, 0.1) + GPy.kern.White(input_dim, 0.05)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T
m = GPy.models.BCGPLVM(Y, input_dim, kernel=k)
self.assertTrue(m.checkgrad())
def test_GPLVM_rbf_linear_white_kern_2D(self):
""" Testing GPLVM with rbf + bias kernel """
N, input_dim, D = 50, 1, 2
@ -410,23 +418,8 @@ class GradientTests(np.testing.TestCase):
Z = np.linspace(0, 15, 4)[:, None]
kernel = GPy.kern.RBF(1)
m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, Z=Z)
# distribution = GPy.likelihoods.likelihood_functions.Bernoulli()
# likelihood = GPy.likelihoods.EP(Y, distribution)
# m = GPy.core.SparseGP(X, likelihood, kernel, Z)
# m.ensure_default_constraints()
self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_generalized_FITC(self):
N = 20
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
k = GPy.kern.RBF(1) + GPy.kern.White(1)
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
m = GPy.models.FITCClassification(X, Y, kernel=k)
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_multioutput_regression_1D(self):
X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5
@ -436,12 +429,11 @@ class GradientTests(np.testing.TestCase):
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.RBF(1)
m = GPy.models.GPMultioutputRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel_list=[k1])
import ipdb;ipdb.set_trace()
m.constrain_fixed('.*rbf_var', 1.)
m = GPy.models.GPCoregionalizedRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel=k1)
#import ipdb;ipdb.set_trace()
#m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_multioutput_sparse_regression_1D(self):
X1 = np.random.rand(500, 1) * 8
X2 = np.random.rand(300, 1) * 5
@ -451,8 +443,7 @@ class GradientTests(np.testing.TestCase):
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.RBF(1)
m = GPy.models.SparseGPMultioutputRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel_list=[k1])
m.constrain_fixed('.*rbf_var', 1.)
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel=k1)
self.assertTrue(m.checkgrad())
def test_gp_heteroscedastic_regression(self):
@ -481,6 +472,7 @@ class GradientTests(np.testing.TestCase):
self.assertTrue(m.checkgrad())
def test_gp_kronecker_gaussian(self):
np.random.seed(0)
N1, N2 = 30, 20
X1 = np.random.randn(N1, 1)
X2 = np.random.randn(N2, 1)
@ -501,16 +493,16 @@ class GradientTests(np.testing.TestCase):
m.randomize()
mm[:] = m[:]
assert np.allclose(m.log_likelihood(), mm.log_likelihood())
assert np.allclose(m.gradient, mm.gradient)
self.assertTrue(np.allclose(m.log_likelihood(), mm.log_likelihood()))
self.assertTrue(np.allclose(m.gradient, mm.gradient))
X1test = np.random.randn(100, 1)
X2test = np.random.randn(100, 1)
mean1, var1 = m.predict(X1test, X2test)
yy, xx = np.meshgrid(X2test, X1test)
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
mean2, var2 = mm.predict(Xgrid)
assert np.allclose(mean1, mean2)
assert np.allclose(var1, var2)
self.assertTrue( np.allclose(mean1, mean2) )
self.assertTrue( np.allclose(var1, var2) )
def test_gp_VGPC(self):
num_obs = 25
@ -518,7 +510,8 @@ class GradientTests(np.testing.TestCase):
X = X[:, None]
Y = 25. + np.sin(X / 20.) * 2. + np.random.rand(num_obs)[:, None]
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kern)
lik = GPy.likelihoods.Gaussian()
m = GPy.models.GPVariationalGaussianApproximation(X, Y, kernel=kern, likelihood=lik)
self.assertTrue(m.checkgrad())

View file

@ -100,10 +100,10 @@ def block_dot(A, B, diagonal=False):
Dshape = D.shape
if diagonal and (len(Cshape) == 1 or len(Dshape) == 1\
or C.shape[0] != C.shape[1] or D.shape[0] != D.shape[1]):
print "Broadcasting, C: {} D:{}".format(C.shape, D.shape)
print("Broadcasting, C: {} D:{}".format(C.shape, D.shape))
return C*D
else:
print "Dotting, C: {} C:{}".format(C.shape, D.shape)
print("Dotting, C: {} C:{}".format(C.shape, D.shape))
return np.dot(C,D)
dot = np.vectorize(f, otypes = [np.object])
return dot(A,B)

View file

@ -5,7 +5,7 @@ import numpy as np
from . import linalg
from .config import config
import choleskies_cython
from . import choleskies_cython
def safe_root(N):
i = np.sqrt(N)
@ -17,12 +17,12 @@ def safe_root(N):
def _flat_to_triang_pure(flat_mat):
N, D = flat_mat.shape
M = (-1 + safe_root(8*N+1))//2
ret = np.zeros((M, M, D))
count = 0
for m in range(M):
for mm in range(m+1):
for d in range(D):
ret.flat[d + m*D*M + mm*D] = flat_mat.flat[count];
ret = np.zeros((D, M, M))
for d in range(D):
count = 0
for m in range(M):
for mm in range(m+1):
ret[d,m, mm] = flat_mat[count, d];
count = count+1
return ret
@ -33,15 +33,15 @@ def _flat_to_triang_cython(flat_mat):
def _triang_to_flat_pure(L):
M, _, D = L.shape
D, _, M = L.shape
N = M*(M+1)//2
flat = np.empty((N, D))
count = 0;
for m in range(M):
for mm in range(m+1):
for d in range(D):
flat.flat[count] = L.flat[d + m*D*M + mm*D];
for d in range(D):
count = 0;
for m in range(M):
for mm in range(m+1):
flat[count,d] = L[d, m, mm]
count = count +1
return flat
@ -59,12 +59,12 @@ def _backprop_gradient_pure(dL, L):
"""
dL_dK = np.tril(dL).copy()
N = L.shape[0]
for k in xrange(N - 1, -1, -1):
for j in xrange(k + 1, N):
for i in xrange(j, N):
for k in range(N - 1, -1, -1):
for j in range(k + 1, N):
for i in range(j, N):
dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
for j in xrange(k + 1, N):
for j in range(k + 1, N):
dL_dK[j, k] /= L[k, k]
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2 * L[k, k])
@ -74,7 +74,7 @@ def triang_to_cov(L):
return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in range(L.shape[-1])])
def multiple_dpotri(Ls):
return np.dstack([linalg.dpotri(np.asfortranarray(Ls[:,:,i]), lower=1)[0] for i in range(Ls.shape[-1])])
return np.array([linalg.dpotri(np.asfortranarray(Ls[i]), lower=1)[0] for i in range(Ls.shape[0])])
def indexes_to_fix_for_low_rank(rank, size):
"""
@ -100,7 +100,7 @@ def indexes_to_fix_for_low_rank(rank, size):
if config.getboolean('cython', 'working'):
triang_to_flat = _triang_to_flat_cython
flat_to_triang = _flat_to_triang_cython
backprop_gradient = choleskies_cython.backprop_gradient
backprop_gradient = choleskies_cython.backprop_gradient_par_c
else:
backprop_gradient = _backprop_gradient_pure
triang_to_flat = _triang_to_flat_pure

File diff suppressed because it is too large Load diff

View file

@ -5,55 +5,110 @@
# Copyright James Hensman and Alan Saul 2015
import numpy as np
from cython.parallel import prange, parallel
cimport numpy as np
cimport scipy.linalg.cython_blas as cblas
def flat_to_triang(np.ndarray[double, ndim=2] flat, int M):
"""take a matrix N x D and return a M X M x D array where
def flat_to_triang(double[:, :] flat, int M):
"""take a matrix N x D and return a D X M x M array where
N = M(M+1)/2
the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
"""
cdef int N = flat.shape[0]
cdef int D = flat.shape[1]
cdef int N = flat.shape[0]
cdef int count = 0
cdef np.ndarray[double, ndim=3] ret = np.zeros((M, M, D))
cdef double[:, :, ::1] ret = np.zeros((D, M, M))
cdef int d, m, mm
for d in range(D):
count = 0
for m in range(M):
for mm in range(m+1):
ret[m, mm, d] = flat[count,d]
count += 1
with nogil:
for d in range(D):
count = 0
for m in range(M):
for mm in range(m+1):
ret[d, m, mm] = flat[count,d]
count += 1
return ret
def triang_to_flat(np.ndarray[double, ndim=3] L):
cdef int M = L.shape[0]
cdef int D = L.shape[2]
def triang_to_flat(double[:, :, :] L):
cdef int D = L.shape[0]
cdef int M = L.shape[1]
cdef int N = M*(M+1)/2
cdef int count = 0
cdef np.ndarray[double, ndim=2] flat = np.empty((N, D))
cdef double[:, ::1] flat = np.empty((N, D))
cdef int d, m, mm
for d in range(D):
count = 0
for m in range(M):
for mm in range(m+1):
flat[count,d] = L[m, mm, d]
count += 1
with nogil:
for d in range(D):
count = 0
for m in range(M):
for mm in range(m+1):
flat[count,d] = L[d, m, mm]
count += 1
return flat
def backprop_gradient(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL).copy()
def backprop_gradient(double[:, :] dL, double[:, :] L):
cdef double[:, ::1] dL_dK = np.tril(dL)
cdef int N = L.shape[0]
cdef int k, j, i
for k in range(N - 1, -1, -1):
for j in range(k + 1, N):
for i in range(j, N):
dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
for j in range(k + 1, N):
dL_dK[j, k] /= L[k, k]
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2. * L[k, k])
with nogil:
for k in range(N - 1, -1, -1):
for j in range(k + 1, N):
for i in range(j, N):
dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
for j in range(k + 1, N):
dL_dK[j, k] /= L[k, k]
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2. * L[k, k])
return dL_dK
def backprop_gradient_par(double[:,:] dL, double[:,:] L):
cdef double[:,::1] dL_dK = np.tril(dL)
cdef int N = L.shape[0]
cdef int k, j, i
with nogil:
for k in range(N - 1, -1, -1):
with parallel():
for i in prange(k + 1, N):
for j in range(k+1, i+1):
dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
for j in range(i, N):
dL_dK[i, k] -= dL_dK[j, i] * L[j, k]
for j in range(k + 1, N):
dL_dK[j, k] /= L[k, k]
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2. * L[k, k])
return dL_dK
cdef void chol_backprop(int N, double[:, ::1] dL, double[:, ::1] L) nogil:
cdef int i, k, n
# DSYMV required constant arguments
cdef double alpha=-1, beta=1
cdef int incx=N
# DSCAL required arguments
cdef double scale
dL[N - 1, N - 1] /= (2. * L[N - 1, N - 1])
for k in range(N-2, -1, -1):
n = N-k-1
cblas.dsymv(uplo='u', n=&n, alpha=&alpha, a=&dL[k + 1, k + 1], lda=&N, x=&L[k + 1, k], incx=&incx,
beta=&beta, y=&dL[k + 1, k], incy=&N)
for i in xrange(0, N - k - 1):
dL[k + 1 + i, k] -= dL[k + i+ 1, k + i + 1] * L[k + 1 + i, k]
scale = 1.0 / L[k, k]
cblas.dscal(&n, &scale , &dL[k + 1, k], &N)
#
dL[k, k] -= cblas.ddot(&n, &dL[k + 1, k], &N, &L[k+1, k], &incx)
dL[k, k] /= (2.0 * L[k, k])
def backprop_gradient_par_c(double[:, :] dL, double[:, :] L):
cdef double[:, ::1] dL_dK = np.tril(dL) # makes a copy, c-contig
cdef double[:, ::1] L_cont = np.ascontiguousarray(L)
cdef int N = L.shape[0]
with nogil:
chol_backprop(N, dL_dK, L_cont)
return np.asarray(dL_dK)

View file

@ -15,7 +15,7 @@ import warnings
import os
from .config import config
import logging
import linalg_cython
from . import linalg_cython
_scipyversion = np.float64((scipy.__version__).split('.')[:2])

File diff suppressed because it is too large Load diff

View file

@ -1,3 +1,4 @@
from libc.math cimport sqrt
cimport numpy as np
from cpython cimport bool
import cython
@ -19,16 +20,18 @@ def symmetrify(np.ndarray[double, ndim=2] A, bool upper):
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
@cython.cdivision(True)
def cholupdate(np.ndarray[double, ndim=1] x, np.ndarray[double, ndim=2] L, int N):
cdef double r
cdef double c
cdef double s
for j in xrange(N):
r = np.sqrt(L[j,j]*L[j,j] + x[j]*x[j])
c = r / L[j,j]
s = x[j] / L[j,j]
L[j,j] = r
for i in xrange(j):
L[i,j] = (L[i,j] + s*x[i])/c
x[i] = c*x[i] - s*L[i,j];
r = np.sqrt(L[j,j])
cdef double r, c, s
cdef int j, i
with nogil:
for j in xrange(N):
r = sqrt(L[j, j] * L[j, j] + x[j] * x[j])
c = r / L[j, j]
s = x[j] / L[j, j]
L[j, j] = r
for i in xrange(j):
L[i, j] = (L[i, j] + s * x[i]) / c
x[i] = c * x[i] - s * L[i, j]
r = sqrt(L[j, j])

View file

@ -8,6 +8,7 @@ from scipy.special import ndtr as std_norm_cdf
#define a standard normal pdf
_sqrt_2pi = np.sqrt(2*np.pi)
def std_norm_pdf(x):
x = np.clip(x,-1e150,1e150)
return np.exp(-np.square(x)/2)/_sqrt_2pi
def inv_std_norm_cdf(x):