merge the changes

This commit is contained in:
Fariba 2015-07-27 17:42:01 +01:00
commit fc0e8f3e7e
66 changed files with 55761 additions and 702 deletions

View file

@ -18,7 +18,8 @@ before_install:
install:
- conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.7 scipy=0.12 matplotlib nose sphinx pip nose
- pip install .
#- pip install .
- python setup.py build_ext --inplace
#--use-mirrors
#
# command to run tests, e.g. python setup.py test

View file

@ -89,7 +89,6 @@ class GP(Model):
assert mean_function.output_dim == self.output_dim
self.link_parameter(mean_function)
#find a sensible inference method
logger.info("initializing inference method")
if inference_method is None:
@ -208,6 +207,7 @@ class GP(Model):
Kxx = kern.Kdiag(_Xnew)
var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1)
var[var<0.] = 0.
#force mu to be a column vector
if len(mu.shape)==1: mu = mu[:,None]
@ -229,13 +229,14 @@ class GP(Model):
:param Y_metadata: metadata about the predicting point to pass to the likelihood
:param kern: The kernel to use for prediction (defaults to the model
kern). this is useful for examining e.g. subprocesses.
:returns: (mean, var, lower_upper):
:returns: (mean, var):
mean: posterior mean, a Numpy array, Nnew x self.input_dim
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
lower_upper: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions.
Note: If you want the predictive quantiles (e.g. 95% confidence interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
"""
#predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
@ -243,7 +244,7 @@ class GP(Model):
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
# now push through likelihood
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
return mean, var
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
@ -255,12 +256,12 @@ class GP(Model):
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
:type quantiles: tuple
:returns: list of quantiles for each X and predictive quantiles for interval combination
:rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)]
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
"""
m, v = self._raw_predict(X, full_cov=False)
if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
def predictive_gradients(self, Xnew):
"""
@ -330,7 +331,7 @@ class GP(Model):
:returns: Ysim: set of simulations, a Numpy array (N x samples).
"""
fsim = self.posterior_samples_f(X, size, full_cov=full_cov)
Ysim = self.likelihood.samples(fsim, Y_metadata)
Ysim = self.likelihood.samples(fsim, Y_metadata=Y_metadata)
return Ysim
def plot_f(self, plot_limits=None, which_data_rows='all',
@ -395,7 +396,7 @@ class GP(Model):
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx', predict_kw=None):
"""
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -444,7 +445,7 @@ class GP(Model):
which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata,
data_symbol=data_symbol, **kw)
data_symbol=data_symbol, predict_kw=predict_kw, **kw)
def input_sensitivity(self, summarize=True):
"""
@ -472,16 +473,51 @@ class GP(Model):
self.inference_method.on_optimization_end()
raise
def infer_newX(self, Y_new, optimize=True, ):
def infer_newX(self, Y_new, optimize=True):
"""
Infer the distribution of X for the new observed data *Y_new*.
Infer X for the new observed data *Y_new*.
:param Y_new: the new observed data for inference
:type Y_new: numpy.ndarray
:param optimize: whether to optimize the location of new X (True by default)
:type optimize: boolean
:return: a tuple containing the posterior estimation of X and the model that optimize X
:rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` or numpy.ndarray, :class:`~GPy.core.model.Model`)
:rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` and numpy.ndarray, :class:`~GPy.core.model.Model`)
"""
from ..inference.latent_function_inference.inferenceX import infer_newX
return infer_newX(self, Y_new, optimize=optimize)
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
"""
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param x_test: test locations (x_{*})
:type x_test: (Nx1) array
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param Y_metadata: metadata associated with the test points
"""
mu_star, var_star = self._raw_predict(x_test)
return self.likelihood.log_predictive_density(y_test, mu_star, var_star, Y_metadata=Y_metadata)
def log_predictive_density_sampling(self, x_test, y_test, Y_metadata=None, num_samples=1000):
"""
Calculation of the log predictive density by sampling
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param x_test: test locations (x_{*})
:type x_test: (Nx1) array
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param Y_metadata: metadata associated with the test points
:param num_samples: number of samples to use in monte carlo integration
:type num_samples: int
"""
mu_star, var_star = self._raw_predict(x_test)
return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples)

View file

@ -76,7 +76,7 @@ class Model(Parameterized):
jobs = []
pool = mp.Pool(processes=num_processes)
for i in range(num_restarts):
self.randomize()
if i>0: self.randomize()
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
jobs.append(job)
@ -90,7 +90,7 @@ class Model(Parameterized):
for i in range(num_restarts):
try:
if not parallel:
self.randomize()
if i>0: self.randomize()
self.optimize(**kwargs)
else:
self.optimization_runs.append(jobs[i].get())

View file

@ -5,6 +5,7 @@ import numpy
from numpy.lib.function_base import vectorize
from .lists_and_dicts import IntArrayDict
from functools import reduce
from .transformations import Transformation
def extract_properties_to_index(index, props):
prop_index = dict()

View file

@ -38,6 +38,11 @@ class Param(Parameterizable, ObsAr):
Fixing parameters will fix them to the value they are right now. If you change
the fixed value, it will be fixed to the new value!
Important Note:
Multilevel indexing (e.g. self[:2][1:]) is not supported and might lead to unexpected behaviour.
Try to index in one go, using boolean indexing or the numpy builtin
np.index function.
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
"""

View file

@ -430,23 +430,38 @@ class Indexable(Nameable, Updateable):
def log_prior(self):
"""evaluate the prior"""
if self.priors.size > 0:
x = self.param_array
#py3 fix
#return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
return 0.
if self.priors.size == 0:
return 0.
x = self.param_array
#evaluate the prior log densities
log_p = reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
#account for the transformation by evaluating the log Jacobian (where things are transformed)
log_j = 0.
priored_indexes = np.hstack([i for p, i in self.priors.items()])
for c,j in self.constraints.items():
if not isinstance(c, Transformation):continue
for jj in j:
if jj in priored_indexes:
log_j += c.log_jacobian(x[jj])
return log_p + log_j
def _log_prior_gradients(self):
"""evaluate the gradients of the priors"""
if self.priors.size > 0:
x = self.param_array
ret = np.zeros(x.size)
#py3 fix
#[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
return ret
return 0.
if self.priors.size == 0:
return 0.
x = self.param_array
ret = np.zeros(x.size)
#compute derivate of prior density
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
#add in jacobian derivatives if transformed
priored_indexes = np.hstack([i for p, i in self.priors.items()])
for c,j in self.constraints.items():
if not isinstance(c, Transformation):continue
for jj in j:
if jj in priored_indexes:
ret[jj] += c.log_jacobian_grad(x[jj])
return ret
#===========================================================================
# Tie parameters together

View file

@ -9,7 +9,7 @@ from .param import ParamConcatenation
from .parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
import logging
from GPy.core.parameterization.index_operations import ParameterIndexOperationsView
from .index_operations import ParameterIndexOperationsView
logger = logging.getLogger("parameters changed meta")
class ParametersChangedMeta(type):

View file

@ -522,16 +522,9 @@ class DGPLVM(Prior):
"""
domain = _REAL
# _instances = []
# def __new__(cls, mu, sigma): # Singleton:
# if cls._instances:
# cls._instances[:] = [instance for instance in cls._instances if instance()]
# for instance in cls._instances:
# if instance().mu == mu and instance().sigma == sigma:
# return instance()
# o = super(Prior, cls).__new__(cls, mu, sigma)
# cls._instances.append(weakref.ref(o))
# return cls._instances[-1]()
def __new__(cls, sigma2, lbl, x_shape):
return super(Prior, cls).__new__(cls, sigma2, lbl, x_shape)
def __init__(self, sigma2, lbl, x_shape):
self.sigma2 = sigma2
@ -730,7 +723,7 @@ class DGPLVM(Prior):
# ******************************************
from parameterized import Parameterized
from .. import Parameterized
from .. import Param
class DGPLVM_Lamda(Prior, Parameterized):
"""
@ -758,12 +751,12 @@ class DGPLVM_Lamda(Prior, Parameterized):
self.sigma2 = sigma2
# self.x = x
self.lbl = lbl
self.lamda = lamda
self.lamda = lamda
self.classnum = lbl.shape[1]
self.datanum = lbl.shape[0]
self.x_shape = x_shape
self.dim = x_shape[1]
self.lamda = Param('lamda', np.diag(lamda))
self.lamda = Param('lamda', np.diag(lamda))
self.link_parameter(self.lamda)
def get_class_label(self, y):
@ -789,7 +782,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
M_i = np.zeros((self.classnum, self.dim))
for i in cls:
# Mean of each class
class_i = cls[i]
class_i = cls[i]
M_i[i] = np.mean(class_i, axis=0)
return M_i
@ -899,8 +892,8 @@ class DGPLVM_Lamda(Prior, Parameterized):
#!!!!!!!!!!!!!!!!!!!!!!!!!!!
#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
@ -916,8 +909,8 @@ class DGPLVM_Lamda(Prior, Parameterized):
# This function calculates derivative of the log of prior function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
@ -951,14 +944,14 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
DPxprim_Dx = DPxprim_Dx.T
DPxprim_Dlamda = DPx_Dx.dot(x)
DPxprim_Dlamda = DPx_Dx.dot(x)
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
DPxprim_Dlamda = DPxprim_Dlamda.T
DPxprim_Dlamda = DPxprim_Dlamda.T
self.lamda.gradient = np.diag(DPxprim_Dlamda)
self.lamda.gradient = np.diag(DPxprim_Dlamda)
# print DPxprim_Dx
return DPxprim_Dx
return DPxprim_Dx
# def frb(self, x):
@ -1139,8 +1132,8 @@ class DGPLVM_T(Prior):
# This function calculates log of our prior
def lnpdf(self, x):
x = x.reshape(self.x_shape)
xprim = x.dot(self.vec)
x = xprim
xprim = x.dot(self.vec)
x = xprim
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
@ -1156,11 +1149,11 @@ class DGPLVM_T(Prior):
# This function calculates derivative of the log of prior function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
xprim = x.dot(self.vec)
x = xprim
x = x.reshape(self.x_shape)
xprim = x.dot(self.vec)
x = xprim
# print x
cls = self.compute_cls(x)
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)

View file

@ -31,6 +31,16 @@ class Transformation(object):
raise NotImplementedError
def finv(self, model_param):
raise NotImplementedError
def log_jacobian(self, model_param):
"""
compute the log of the jacobian of f, evaluated at f(x)= model_param
"""
raise NotImplementedError
def log_jacobian_grad(self, model_param):
"""
compute the drivative of the log of the jacobian of f, evaluated at f(x)= model_param
"""
raise NotImplementedError
def gradfactor(self, model_param, dL_dmodel_param):
""" df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param,
@ -74,9 +84,33 @@ class Logexp(Transformation):
if np.any(f < 0.):
print("Warning: changing parameters to satisfy constraints")
return np.abs(f)
def log_jacobian(self, model_param):
return np.where(model_param>_lim_val, model_param, np.log(np.exp(model_param+1e-20) - 1.)) - model_param
def log_jacobian_grad(self, model_param):
return 1./(np.exp(model_param)-1.)
def __str__(self):
return '+ve'
class Exponent(Transformation):
domain = _POSITIVE
def f(self, x):
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
def finv(self, x):
return np.log(x)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, f)
def initialize(self, f):
if np.any(f < 0.):
print("Warning: changing parameters to satisfy constraints")
return np.abs(f)
def log_jacobian(self, model_param):
return np.log(model_param)
def log_jacobian_grad(self, model_param):
return 1./model_param
def __str__(self):
return '+ve'
class NormalTheta(Transformation):
"Do not use, not officially supported!"
@ -417,22 +451,6 @@ class LogexpClipped(Logexp):
def __str__(self):
return '+ve_c'
class Exponent(Transformation):
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative Exponent below. See old MATLAB code.
domain = _POSITIVE
def f(self, x):
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
def finv(self, x):
return np.log(x)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, f)
def initialize(self, f):
if np.any(f < 0.):
print("Warning: changing parameters to satisfy constraints")
return np.abs(f)
def __str__(self):
return '+ve'
class NegativeExponent(Exponent):
domain = _NEGATIVE
def f(self, x):

View file

@ -36,8 +36,9 @@ class NormalPrior(VariationalPrior):
variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5
class SpikeAndSlabPrior(VariationalPrior):
def __init__(self, pi=None, learnPi=False, variance = 1.0, name='SpikeAndSlabPrior', **kw):
super(SpikeAndSlabPrior, self).__init__(name=name, **kw)
def __init__(self, pi=None, learnPi=False, variance = 1.0, group_spike=False, name='SpikeAndSlabPrior', **kw):
super(SpikeAndSlabPrior, self).__init__(name=name, **kw)
self.group_spike = group_spike
self.variance = Param('variance',variance)
self.learnPi = learnPi
if learnPi:
@ -50,7 +51,10 @@ class SpikeAndSlabPrior(VariationalPrior):
def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.gamma.values
if self.group_spike:
gamma = variational_posterior.gamma.values[0]
else:
gamma = variational_posterior.gamma.values
if len(self.pi.shape)==2:
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
@ -65,14 +69,21 @@ class SpikeAndSlabPrior(VariationalPrior):
def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.gamma.values
if self.group_spike:
gamma = variational_posterior.gamma.values[0]
else:
gamma = variational_posterior.gamma.values
if len(self.pi.shape)==2:
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
if self.group_spike:
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))/variational_posterior.num_data
else:
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= gamma*mu/self.variance
S.gradient -= (1./self.variance - 1./S) * gamma /2.
if self.learnPi:
@ -154,13 +165,31 @@ class SpikeAndSlabPosterior(VariationalPosterior):
'''
The SpikeAndSlab distribution for variational approximations.
'''
def __init__(self, means, variances, binary_prob, name='latent space'):
def __init__(self, means, variances, binary_prob, group_spike=False, sharedX=False, name='latent space'):
"""
binary_prob : the probability of the distribution on the slab part.
"""
super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
self.gamma = Param("binary_prob",binary_prob,Logistic(0.,1.))
self.link_parameter(self.gamma)
self.group_spike = group_spike
self.sharedX = sharedX
if sharedX:
self.mean.fix(warning=False)
self.variance.fix(warning=False)
if group_spike:
self.gamma_group = Param("binary_prob_group",binary_prob.mean(axis=0),Logistic(1e-10,1.-1e-10))
self.gamma = Param("binary_prob",binary_prob, __fixed__)
self.link_parameters(self.gamma_group,self.gamma)
else:
self.gamma = Param("binary_prob",binary_prob,Logistic(1e-10,1.-1e-10))
self.link_parameter(self.gamma)
def propogate_val(self):
if self.group_spike:
self.gamma.values[:] = self.gamma_group.values
def collate_gradient(self):
if self.group_spike:
self.gamma_group.gradient = self.gamma.gradient.reshape(self.gamma.shape).sum(axis=0)
def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad
@ -179,15 +208,15 @@ class SpikeAndSlabPosterior(VariationalPosterior):
n.parameters[dc['variance']._parent_index_] = dc['variance']
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size
n.size = n.mean.size + n.variance.size + oversize
oversize = self.size - self.mean.size - self.variance.size - self.gamma.size
n.size = n.mean.size + n.variance.size + n.gamma.size + oversize
n.ndim = n.mean.ndim
n.shape = n.mean.shape
n.num_data = n.mean.shape[0]
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
return n
else:
return super(VariationalPrior, self).__getitem__(s)
return super(SpikeAndSlabPosterior, self).__getitem__(s)
def plot(self, *args, **kwargs):
"""

View file

@ -132,14 +132,14 @@ class SparseGP(GP):
if self.posterior.woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3:
var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
var = var
else:
Kxx = kern.Kdiag(Xnew)
if self.posterior.woodbury_inv.ndim == 2:
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
#For plot_latent, the below code doesn't work!
#var = Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0)
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]):
@ -149,9 +149,9 @@ class SparseGP(GP):
if self.mean_function is not None:
mu += self.mean_function.f(Xnew)
else:
psi0_star = self.kern.psi0(self.Z, Xnew)
psi1_star = self.kern.psi1(self.Z, Xnew)
#psi2_star = self.kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
psi0_star = kern.psi0(self.Z, Xnew)
psi1_star = kern.psi1(self.Z, Xnew)
#psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
la = self.posterior.woodbury_vector
mu = np.dot(psi1_star, la) # TODO: dimensions?
@ -163,7 +163,7 @@ class SparseGP(GP):
for i in range(Xnew.shape[0]):
_mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
psi2_star = self.kern.psi2(self.Z, NormalPosterior(_mu, _var))
psi2_star = kern.psi2(self.Z, NormalPosterior(_mu, _var))
tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
var_ = mdot(la.T, tmp, la)

View file

@ -9,7 +9,7 @@ from ..inference.latent_function_inference import SVGP as svgp_inf
class SVGP(SparseGP):
def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, name='SVGP', Y_metadata=None, batchsize=None):
def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, name='SVGP', Y_metadata=None, batchsize=None, num_latent_functions=None):
"""
Stochastic Variational GP.
@ -41,8 +41,12 @@ class SVGP(SparseGP):
SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, mean_function=mean_function, inference_method=inf_method,
name=name, Y_metadata=Y_metadata, normalizer=False)
self.m = Param('q_u_mean', np.zeros((self.num_inducing, Y.shape[1])))
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,Y.shape[1])))
#assume the number of latent functions is one per col of Y unless specified
if num_latent_functions is None:
num_latent_functions = Y.shape[1]
self.m = Param('q_u_mean', np.zeros((self.num_inducing, num_latent_functions)))
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[None,:,:], (num_latent_functions, 1,1)))
self.chol = Param('q_u_chol', chol)
self.link_parameter(self.chol)
self.link_parameter(self.m)

View file

@ -5,9 +5,10 @@ from __future__ import print_function
import numpy as np
import sys
import time
import datetime
def exponents(fnow, current_grad):
exps = [np.abs(np.float(fnow)), current_grad]
exps = [np.abs(np.float(fnow)), 1 if current_grad is np.nan else current_grad]
return np.sign(exps) * np.log10(exps).astype(int)
class VerboseOptimization(object):
@ -23,6 +24,7 @@ class VerboseOptimization(object):
self.model.add_observer(self, self.print_status)
self.status = 'running'
self.clear = clear_after_finish
self.deltat = .2
self.update()
@ -44,25 +46,25 @@ class VerboseOptimization(object):
self.hor_align = FlexBox(children = [left_col, right_col], width='100%', orientation='horizontal')
display(self.hor_align)
try:
self.text.set_css('width', '100%')
left_col.set_css({
'padding': '2px',
'width': "100%",
})
right_col.set_css({
'padding': '2px',
})
self.hor_align.set_css({
'width': "100%",
})
self.hor_align.remove_class('vbox')
self.hor_align.add_class('hbox')
left_col.add_class("box-flex1")
right_col.add_class('box-flex0')
@ -74,16 +76,31 @@ class VerboseOptimization(object):
else:
self.exps = exponents(self.fnow, self.current_gradient)
print('Running {} Code:'.format(self.opt_name))
print(' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "secs", mi=self.len_maxiters))
print(' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "runtime", mi=self.len_maxiters))
def __enter__(self):
self.start = time.time()
return self
def print_out(self):
def print_out(self, seconds):
if seconds<60:
ms = (seconds%1)*100
self.timestring = "{s:0>2d}s{ms:0>2d}".format(s=int(seconds), ms=int(ms))
else:
m, s = divmod(seconds, 60)
if m>59:
h, m = divmod(m, 60)
if h>23:
d, h = divmod(h, 24)
self.timestring = '{d:0>2d}d{h:0>2d}h{m:0>2d}'.format(m=int(m), h=int(h), d=int(d))
else:
self.timestring = '{h:0>2d}h{m:0>2d}m{s:0>2d}'.format(m=int(m), s=int(s), h=int(h))
else:
ms = (seconds%1)*100
self.timestring = '{m:0>2d}m{s:0>2d}s{ms:0>2d}'.format(m=int(m), s=int(s), ms=int(ms))
if self.ipython_notebook:
names_vals = [['optimizer', "{:s}".format(self.opt_name)],
['runtime [s]', "{:> g}".format(time.time()-self.start)],
['runtime', "{:>s}".format(self.timestring)],
['evaluation', "{:>0{l}}".format(self.iteration, l=self.len_maxiters)],
['objective', "{: > 12.3E}".format(self.fnow)],
['||gradient||', "{: >+12.3E}".format(float(self.current_gradient))],
@ -120,14 +137,18 @@ class VerboseOptimization(object):
if b:
self.exps = n_exps
print('\r', end=' ')
print('{3:> 7.2g} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), time.time()-self.start, mi=self.len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
print('{3:} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), "{:>8s}".format(self.timestring), mi=self.len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush()
def print_status(self, me, which=None):
self.update()
seconds = time.time()-self.start
#sys.stdout.write(" "*len(self.message))
self.print_out()
self.deltat += seconds
if self.deltat > .2:
self.print_out(seconds)
self.deltat = 0
self.iteration += 1
@ -141,17 +162,24 @@ class VerboseOptimization(object):
def finish(self, opt):
self.status = opt.status
if self.verbose and self.ipython_notebook:
if 'conv' in self.status.lower():
self.progress.bar_style = 'success'
elif self.iteration >= self.maxiters:
self.progress.bar_style = 'warning'
else:
self.progress.bar_style = 'danger'
def __exit__(self, type, value, traceback):
if self.verbose:
self.stop = time.time()
self.model.remove_observer(self)
self.print_out()
self.print_out(self.stop - self.start)
if not self.ipython_notebook:
print()
print('Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start))
print('Optimization status: {0}'.format(self.status))
print('Runtime: {}'.format("{:>9s}".format(self.timestring)))
print('Optimization status: {0}'.format(self.status))
print()
elif self.clear:
self.hor_align.close()

View file

@ -25,3 +25,6 @@ MKL = False
[weave]
#if true, try to use weave, and fall back to numpy. if false, just use numpy.
working = True
[cython]
working = True

View file

@ -217,9 +217,8 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
elif model_type == 'FITC':
m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
m['.*len'] = 3.
if optimize:
m.pseudo_EM()
m.optimize()
if plot:
m.plot()

View file

@ -215,6 +215,7 @@ def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40
return m
def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
"""Simulate some data drawn from a matern covariance and a periodic exponential for use in MRD demos."""
Q_signal = 4
import GPy
import numpy as np
@ -254,6 +255,7 @@ def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
return slist, [S1, S2, S3], Ylist
def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
"""Simulate some data drawn from sine and cosine for use in demos of MRD"""
_np.random.seed(1234)
x = _np.linspace(0, 4 * _np.pi, N)[:, None]
@ -353,13 +355,13 @@ def ssgplvm_simulation(optimize=True, verbose=1,
Y = Ylist[0]
k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
# k = kern.RBF(Q, ARD=True, lengthscale=10.)
m = SSGPLVM(Y, Q, init="pca", num_inducing=num_inducing, kernel=k)
m = SSGPLVM(Y, Q, init="rand", num_inducing=num_inducing, kernel=k, group_spike=True)
m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape)
m.likelihood.variance = .1
m.likelihood.variance = .01
if optimize:
print("Optimizing model:")
m.optimize('scg', messages=verbose, max_iters=max_iters,
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05)
if plot:
m.X.plot("SSGPLVM Latent Space 1D")
@ -402,7 +404,8 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
from GPy.models import MRD
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim)
# Ylist = [Ylist[0]]
k = kern.Linear(Q, ARD=True)
@ -585,6 +588,7 @@ def robot_wireless(optimize=True, verbose=True, plot=True):
return m
def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
"""Interactive visualisation of the Stick Man data from Ohio State University with the Bayesian GPLVM."""
from GPy.models import BayesianGPLVM
from matplotlib import pyplot as plt
import numpy as np
@ -613,7 +617,8 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect'])
dim_select = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
fig.canvas.draw()
fig.canvas.show()
# Canvas.show doesn't work on OSX.
#fig.canvas.show()
raw_input('Press enter to finish')
return m

View file

@ -52,7 +52,7 @@ class ExactGaussianInference(LatentFunctionInference):
K = kern.K(X)
Ky = K.copy()
diag.add(Ky, likelihood.gaussian_variance(Y_metadata))
diag.add(Ky, likelihood.gaussian_variance(Y_metadata)+1e-8)
Wi, LW, LWi, W_logdet = pdinv(Ky)
alpha, _ = dpotrs(LW, YYT_factor, lower=1)

View file

@ -27,12 +27,19 @@ def infer_newX(model, Y_new, optimize=True, init='L2'):
class InferenceX(Model):
"""
The class for inference of new X with given new Y. (do_test_latent)
The model class for inference of new X with given new Y. (replacing the "do_test_latent" in Bayesian GPLVM)
It is a tiny inference model created from the original GP model. The kernel, likelihood (only Gaussian is supported at the moment)
and posterior distribution are taken from the original model.
For Regression models and GPLVM, a point estimate of the latent variable X will be inferred.
For Bayesian GPLVM, the variational posterior of X will be inferred.
X is inferred through a gradient optimization of the inference model.
:param model: the GPy model used in inference
:type model: GPy.core.Model
:param Y: the new observed data for inference
:type Y: numpy.ndarray
:param init: the distance metric of Y for initializing X with the nearest neighbour.
:type init: 'L2', 'NCC' and 'rand'
"""
def __init__(self, model, Y, name='inferenceX', init='L2'):
if np.isnan(Y).any() or getattr(model, 'missing_data', False):
@ -45,17 +52,23 @@ class InferenceX(Model):
super(InferenceX, self).__init__(name)
self.likelihood = model.likelihood.copy()
self.kern = model.kern.copy()
if model.kern.useGPU:
from ...models import SSGPLVM
if isinstance(model, SSGPLVM):
self.kern.GPU_SSRBF(True)
else:
self.kern.GPU(True)
# if model.kern.useGPU:
# from ...models import SSGPLVM
# if isinstance(model, SSGPLVM):
# self.kern.GPU_SSRBF(True)
# else:
# self.kern.GPU(True)
from copy import deepcopy
self.posterior = deepcopy(model.posterior)
if hasattr(model, 'variational_prior'):
self.uncertain_input = True
self.variational_prior = model.variational_prior.copy()
from ...models.ss_gplvm import IBPPrior
from ...models.ss_mrd import IBPPrior_SSMRD
if isinstance(model.variational_prior, IBPPrior) or isinstance(model.variational_prior, IBPPrior_SSMRD):
from ...core.parameterization.variational import SpikeAndSlabPrior
self.variational_prior = SpikeAndSlabPrior(pi=05,learnPi=False, group_spike=False)
else:
self.variational_prior = model.variational_prior.copy()
else:
self.uncertain_input = False
if hasattr(model, 'inducing_inputs'):
@ -147,9 +160,9 @@ class InferenceX(Model):
from ...core.parameterization.variational import SpikeAndSlabPrior
if isinstance(self.variational_prior, SpikeAndSlabPrior):
# Update Log-likelihood
KL_div = self.variational_prior.KL_divergence(self.X, N=self.Y.shape[0])
KL_div = self.variational_prior.KL_divergence(self.X)
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X, N=self.Y.shape[0])
self.variational_prior.update_gradients_KL(self.X)
else:
# Update Log-likelihood
KL_div = self.variational_prior.KL_divergence(self.X)

View file

@ -139,10 +139,6 @@ class Laplace(LatentFunctionInference):
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
self.f_hat = f_hat
#self.Ki_fhat = Ki_fhat
#self.K = K.copy()
#Compute hessian and other variables at mode
log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
@ -298,6 +294,11 @@ class Laplace(LatentFunctionInference):
else:
dL_dthetaL = np.zeros(likelihood.size)
#Cache some things for speedy LOO
self.Ki_W_i = Ki_W_i
self.K = K
self.W = W
self.f_hat = f_hat
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):

View file

@ -3,25 +3,27 @@ from ...util import linalg
from ...util import choleskies
import numpy as np
from .posterior import Posterior
from scipy.linalg.blas import dgemm, dsymm, dtrmm
class SVGP(LatentFunctionInference):
def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
num_inducing = Z.shape[0]
num_data, num_outputs = Y.shape
num_data, _ = Y.shape
num_inducing, num_outputs = q_u_mean.shape
#expand cholesky representation
L = choleskies.flat_to_triang(q_u_chol)
S = np.einsum('ijk,ljk->ilk', L, L) #L.dot(L.T)
S = np.empty((num_outputs, num_inducing, num_inducing))
[np.dot(L[i,:,:], L[i,:,:].T, S[i,:,:]) for i in range(num_outputs)]
#Si,_ = linalg.dpotri(np.asfortranarray(L), lower=1)
Si = choleskies.multiple_dpotri(L)
logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])])
logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[i,:,:])))) for i in range(L.shape[0])])
if np.any(np.isinf(Si)):
raise ValueError("Cholesky representation unstable")
#S = S + np.eye(S.shape[0])*1e-5*np.max(np.max(S))
#Si, Lnew, _,_ = linalg.pdinv(S)
#compute mean function stuff
if mean_function is not None:
@ -31,26 +33,31 @@ class SVGP(LatentFunctionInference):
prior_mean_u = np.zeros((num_inducing, num_outputs))
prior_mean_f = np.zeros((num_data, num_outputs))
#compute kernel related stuff
Kmm = kern.K(Z)
Knm = kern.K(X, Z)
Kmn = kern.K(Z, X)
Knn_diag = kern.Kdiag(X)
Kmmi, Lm, Lmi, logdetKmm = linalg.pdinv(Kmm)
Lm = linalg.jitchol(Kmm)
logdetKmm = 2.*np.sum(np.log(np.diag(Lm)))
Kmmi, _ = linalg.dpotri(Lm)
#compute the marginal means and variances of q(f)
A = np.dot(Knm, Kmmi)
mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jkl->ikl', A, S),1)
A, _ = linalg.dpotrs(Lm, Kmn)
mu = prior_mean_f + np.dot(A.T, q_u_mean - prior_mean_u)
v = np.empty((num_data, num_outputs))
for i in range(num_outputs):
tmp = dtrmm(1.0,L[i].T, A, lower=0, trans_a=0)
v[:,i] = np.sum(np.square(tmp),0)
v += (Knn_diag - np.sum(A*Kmn,0))[:,None]
#compute the KL term
Kmmim = np.dot(Kmmi, q_u_mean)
KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.einsum('ij,ijk->k', Kmmi, S) + 0.5*np.sum(q_u_mean*Kmmim,0)
KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[None,:,:]*S,1).sum(1) + 0.5*np.sum(q_u_mean*Kmmim,0)
KL = KLs.sum()
#gradient of the KL term (assuming zero mean function)
dKL_dm = Kmmim.copy()
dKL_dS = 0.5*(Kmmi[:,:,None] - Si)
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
dKL_dS = 0.5*(Kmmi[None,:,:] - Si)
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(0)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
if mean_function is not None:
#adjust KL term for mean function
@ -75,14 +82,20 @@ class SVGP(LatentFunctionInference):
dF_dthetaL = dF_dthetaL.sum(1).sum(1)*batch_scale
#derivatives of expected likelihood, assuming zero mean function
Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal
Admu = A.T.dot(dF_dmu)
AdvA = np.dstack([np.dot(A.T, Adv[:,:,i].T) for i in range(num_outputs)])
tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi)
dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(-1) - tmp - tmp.T
Adv = A[None,:,:]*dF_dv.T[:,None,:] # As if dF_Dv is diagonal, D, M, N
Admu = A.dot(dF_dmu)
Adv = np.ascontiguousarray(Adv) # makes for faster operations later...(inc dsymm)
AdvA = np.dot(Adv.reshape(-1, num_data),A.T).reshape(num_outputs, num_inducing, num_inducing )
tmp = np.sum([np.dot(a,s) for a, s in zip(AdvA, S)],0).dot(Kmmi)
dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(0) - tmp - tmp.T
dF_dKmm = 0.5*(dF_dKmm + dF_dKmm.T) # necessary? GPy bug?
tmp = 2.*(np.einsum('ij,jlk->ilk', Kmmi,S) - np.eye(num_inducing)[:,:,None])
dF_dKmn = np.einsum('ijk,jlk->il', tmp, Adv) + Kmmim.dot(dF_dmu.T)
tmp = S.reshape(-1, num_inducing).dot(Kmmi).reshape(num_outputs, num_inducing , num_inducing )
tmp = 2.*(tmp - np.eye(num_inducing)[None, :,:])
dF_dKmn = Kmmim.dot(dF_dmu.T)
for a,b in zip(tmp, Adv):
dF_dKmn += np.dot(a.T, b)
dF_dm = Admu
dF_dS = AdvA
@ -98,11 +111,11 @@ class SVGP(LatentFunctionInference):
log_marginal = F.sum() - KL
dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn
dL_dchol = np.dstack([2.*np.dot(dL_dS[:,:,i], L[:,:,i]) for i in range(num_outputs)])
dL_dchol = 2.*np.array([np.dot(a,b) for a, b in zip(dL_dS, L) ])
dL_dchol = choleskies.triang_to_flat(dL_dchol)
grad_dict = {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv.sum(1), 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
if mean_function is not None:
grad_dict['dL_dmfZ'] = dF_dmfZ - dKL_dmfZ
grad_dict['dL_dmfX'] = dF_dmfX
return Posterior(mean=q_u_mean, cov=S, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict
return Posterior(mean=q_u_mean, cov=S.T, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict

View file

@ -142,7 +142,7 @@ class opt_lbfgsb(Optimizer):
#a more helpful error message is available in opt_result in the Error case
if opt_result[2]['warnflag']==2:
self.status = 'Error' + opt_result[2]['task']
self.status = 'Error' + str(opt_result[2]['task'])
class opt_simplex(Optimizer):
def __init__(self, *args, **kwargs):

View file

@ -19,5 +19,5 @@ from ._src.splitKern import SplitKern,DEtime
from ._src.splitKern import DEtime as DiffGenomeKern
from _src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
from ._src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel

View file

@ -121,7 +121,7 @@ class LinearSlopeBasisFuncKernel(BasisFuncKernel):
class ChangePointBasisFuncKernel(BasisFuncKernel):
def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'):
self.changepoint = changepoint
self.changepoint = np.array(changepoint)
super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
@Cache_this(limit=3, ignore_args=())
@ -136,3 +136,48 @@ class DomainKernel(LinearSlopeBasisFuncKernel):
def _phi(self, X):
phi = np.where((X>self.start)*(X<self.stop), 1, 0)
return phi#((phi-self.start)/(self.stop-self.start))-.5
class LogisticBasisFuncKernel(BasisFuncKernel):
def __init__(self, input_dim, centers, variance=1., slope=1., active_dims=None, ARD=False, ARD_slope=True, name='logistic'):
self.centers = np.atleast_2d(centers)
self.ARD_slope = ARD_slope
if self.ARD_slope:
self.slope = Param('slope', slope * np.ones(self.centers.size), Logexp())
else:
self.slope = Param('slope', slope, Logexp())
super(LogisticBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
self.link_parameter(self.slope)
@Cache_this(limit=3, ignore_args=())
def _phi(self, X):
import scipy as sp
phi = 1/(1+np.exp(-((X-self.centers)*self.slope)))
return np.where(np.isnan(phi), 0, phi)#((phi-self.start)/(self.stop-self.start))-.5
def parameters_changed(self):
BasisFuncKernel.parameters_changed(self)
def update_gradients_full(self, dL_dK, X, X2=None):
super(LogisticBasisFuncKernel, self).update_gradients_full(dL_dK, X, X2)
if X2 is None or X is X2:
phi1 = self.phi(X)
if phi1.ndim != 2:
phi1 = phi1[:, None]
dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
if self.ARD_slope:
self.slope.gradient = self.variance * 2 * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi1_dl)
else:
self.slope.gradient = self.variance * 2 * (dL_dK * phi1.dot(dphi1_dl.T)).sum()
else:
phi1 = self.phi(X)
phi2 = self.phi(X2)
if phi1.ndim != 2:
phi1 = phi1[:, None]
phi2 = phi2[:, None]
dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
dphi2_dl = (phi2**2) * (np.exp(-((X2-self.centers)*self.slope)) * (X2-self.centers))
if self.ARD_slope:
self.slope.gradient = (self.variance * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi2_dl) + np.einsum('ij,iq,jq->q', dL_dK, phi2, dphi1_dl))
else:
self.slope.gradient = self.variance * (dL_dK * phi1.dot(dphi2_dl.T)).sum() + (dL_dK * phi2.dot(dphi1_dl.T)).sum()
self.slope.gradient = np.where(np.isnan(self.slope.gradient), 0, self.slope.gradient)

View file

@ -5,12 +5,8 @@ from .kern import Kern
import numpy as np
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.config import config # for assesing whether to use weave
try:
from scipy import weave
except ImportError:
config.set('weave', 'working', 'False')
from ...util.config import config # for assesing whether to use cython
from . import coregionalize_cython
class Coregionalize(Kern):
"""
@ -61,13 +57,8 @@ class Coregionalize(Kern):
self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
def K(self, X, X2=None):
if config.getboolean('weave', 'working'):
try:
return self._K_weave(X, X2)
except:
print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
config.set('weave', 'working', 'False')
return self._K_numpy(X, X2)
if config.getboolean('cython', 'working'):
return self._K_cython(X, X2)
else:
return self._K_numpy(X, X2)
@ -80,36 +71,10 @@ class Coregionalize(Kern):
index2 = np.asarray(X2, dtype=np.int)
return self.B[index,index2.T]
def _K_weave(self, X, X2=None):
"""compute the kernel function using scipy.weave"""
index = np.asarray(X, dtype=np.int)
def _K_cython(self, X, X2=None):
if X2 is None:
target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64)
code="""
for(int i=0;i<N; i++){
target[i+i*N] = B[index[i]+output_dim*index[i]];
for(int j=0; j<i; j++){
target[j+i*N] = B[index[i]+output_dim*index[j]];
target[i+j*N] = target[j+i*N];
}
}
"""
N, B, output_dim = index.size, self.B, self.output_dim
weave.inline(code, ['target', 'index', 'N', 'B', 'output_dim'])
else:
index2 = np.asarray(X2, dtype=np.int)
target = np.empty((X.shape[0], X2.shape[0]), dtype=np.float64)
code="""
for(int i=0;i<num_inducing; i++){
for(int j=0; j<N; j++){
target[i+j*num_inducing] = B[output_dim*index[j]+index2[i]];
}
}
"""
N, num_inducing, B, output_dim = index.size, index2.size, self.B, self.output_dim
weave.inline(code, ['target', 'index', 'index2', 'N', 'num_inducing', 'B', 'output_dim'])
return target
return coregionalize_cython.K_symmetric(self.B, np.asarray(X, dtype=np.int64)[:,0])
return coregionalize_cython.K_asymmetric(self.B, np.asarray(X, dtype=np.int64)[:,0], np.asarray(X2, dtype=np.int64)[:,0])
def Kdiag(self, X):
@ -122,19 +87,13 @@ class Coregionalize(Kern):
else:
index2 = np.asarray(X2, dtype=np.int)
#attempt to use weave for a nasty double indexing loop: fall back to numpy
if config.getboolean('weave', 'working'):
try:
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
except:
print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
config.set('weave', 'working', 'False')
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
#attempt to use cython for a nasty double indexing loop: fall back to numpy
if config.getboolean('cython', 'working'):
dL_dK_small = self._gradient_reduce_cython(dL_dK, index, index2)
else:
dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)
dkappa = np.diag(dL_dK_small)
dL_dK_small += dL_dK_small.T
dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
@ -142,19 +101,6 @@ class Coregionalize(Kern):
self.W.gradient = dW
self.kappa.gradient = dkappa
def _gradient_reduce_weave(self, dL_dK, index, index2):
dL_dK_small = np.zeros_like(self.B)
code="""
for(int i=0; i<num_inducing; i++){
for(int j=0; j<N; j++){
dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
}
}
"""
N, num_inducing, output_dim = index.size, index2.size, self.output_dim
weave.inline(code, ['N', 'num_inducing', 'output_dim', 'dL_dK', 'dL_dK_small', 'index', 'index2'])
return dL_dK_small
def _gradient_reduce_numpy(self, dL_dK, index, index2):
index, index2 = index[:,0], index2[:,0]
dL_dK_small = np.zeros_like(self.B)
@ -164,6 +110,11 @@ class Coregionalize(Kern):
dL_dK_small[j,i] = tmp1[:,index2==j].sum()
return dL_dK_small
def _gradient_reduce_cython(self, dL_dK, index, index2):
index, index2 = index[:,0], index2[:,0]
return coregionalize_cython.gradient_reduce(self.B.shape[0], dL_dK, index, index2)
def update_gradients_diag(self, dL_dKdiag, X):
index = np.asarray(X, dtype=np.int).flatten()
dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in range(self.output_dim)])

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,34 @@
#cython: boundscheck=True
#cython: wraparound=True
import cython
import numpy as np
cimport numpy as np
def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X):
cdef int N = X.size
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, N))
for n in range(N):
for m in range(N):
K[n,m] = B[X[n],X[m]]
return K
def K_asymmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X, np.ndarray[np.int64_t, ndim=1] X2):
cdef int N = X.size
cdef int M = X2.size
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, M))
for n in range(N):
for m in range(M):
K[n,m] = B[X[n],X2[m]]
return K
def gradient_reduce(int D, np.ndarray[double, ndim=2] dL_dK, np.ndarray[np.int64_t, ndim=1] index, np.ndarray[np.int64_t, ndim=1] index2):
cdef np.ndarray[np.double_t, ndim=2] dL_dK_small = np.zeros((D, D))
cdef int N = index.size
cdef int M = index2.size
for i in range(N):
for j in range(M):
dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
return dL_dK_small

View file

@ -105,7 +105,7 @@ class IndependentOutputs(CombinationKernel):
if X2 is None:
# TODO: make use of index_to_slices
# FIXME: Broken as X is already sliced out
print "Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!"
print("Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!")
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))

View file

@ -78,7 +78,7 @@ class MLP(Kern):
*((vec1[:, None]+vec2[None, :])*self.weight_variance
+ 2*self.bias_variance + 2.))*base_cov_grad).sum()
def update_gradients_diag(self, X):
def update_gradients_diag(self, dL_dKdiag, X):
self._K_diag_computations(X)
self.variance.gradient = np.sum(self._K_diag_dvar*dL_dKdiag)

View file

@ -20,7 +20,6 @@ class RBF(Stationary):
_support_GPU = True
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
self.psicomp = PSICOMP_RBF()
if self.useGPU:
self.psicomp = PSICOMP_RBF_GPU()
else:
@ -36,6 +35,7 @@ class RBF(Stationary):
dc = super(RBF, self).__getstate__()
if self.useGPU:
dc['psicomp'] = PSICOMP_RBF()
dc['useGPU'] = False
return dc
def __setstate__(self, state):

View file

@ -9,13 +9,15 @@ from ...util.linalg import tdot
from ... import util
import numpy as np
from scipy import integrate
from ...util.config import config # for assesing whether to use weave
from ...util.config import config # for assesing whether to use cython
from ...util.caching import Cache_this
try:
from scipy import weave
from . import stationary_cython
except ImportError:
config.set('weave', 'working', 'False')
print('warning in sationary: failed to import cython module: falling back to numpy')
config.set('cython', 'working', 'false')
class Stationary(Kern):
"""
@ -153,28 +155,18 @@ class Stationary(Kern):
(dL_dK), compute the gradient wrt the parameters of this kernel,
and store in the parameters object as e.g. self.variance.gradient
"""
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
self.variance.gradient = np.sum(self.K(X, X2)* dL_dK)/self.variance
#now the lengthscale gradient(s)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
if self.ARD:
#rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2)
#d = X[:, None, :] - X2[None, :, :]
#x_xl3 = np.square(d)
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
tmp = dL_dr*self._inv_dist(X, X2)
if X2 is None: X2 = X
if config.getboolean('weave', 'working'):
try:
self.lengthscale.gradient = self.weave_lengthscale_grads(tmp, X, X2)
except:
print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
config.set('weave', 'working', 'False')
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in range(self.input_dim)])
if config.getboolean('cython', 'working'):
self.lengthscale.gradient = self._lengthscale_grads_cython(tmp, X, X2)
else:
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in range(self.input_dim)])
self.lengthscale.gradient = self._lengthscale_grads_pure(tmp, X, X2)
else:
r = self._scaled_dist(X, X2)
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
@ -189,43 +181,27 @@ class Stationary(Kern):
dist = self._scaled_dist(X, X2).copy()
return 1./np.where(dist != 0., dist, np.inf)
def weave_lengthscale_grads(self, tmp, X, X2):
"""Use scipy.weave to compute derivatives wrt the lengthscales"""
def _lengthscale_grads_pure(self, tmp, X, X2):
return -np.array([np.sum(tmp * np.square(X[:,q:q+1] - X2[:,q:q+1].T)) for q in range(self.input_dim)])/self.lengthscale**3
def _lengthscale_grads_cython(self, tmp, X, X2):
N,M = tmp.shape
Q = X.shape[1]
if hasattr(X, 'values'):X = X.values
if hasattr(X2, 'values'):X2 = X2.values
Q = self.input_dim
X, X2 = np.ascontiguousarray(X), np.ascontiguousarray(X2)
grads = np.zeros(self.input_dim)
code = """
double gradq;
for(int q=0; q<Q; q++){
gradq = 0;
for(int n=0; n<N; n++){
for(int m=0; m<M; m++){
gradq += tmp(n,m)*(X(n,q)-X2(m,q))*(X(n,q)-X2(m,q));
}
}
grads(q) = gradq;
}
"""
weave.inline(code, ['tmp', 'X', 'X2', 'grads', 'N', 'M', 'Q'], type_converters=weave.converters.blitz, support_code="#include <math.h>")
stationary_cython.lengthscale_grads(N, M, Q, tmp, X, X2, grads)
return -grads/self.lengthscale**3
def gradients_X(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
"""
if config.getboolean('weave', 'working'):
try:
return self.gradients_X_weave(dL_dK, X, X2)
except:
print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
config.set('weave', 'working', 'False')
return self.gradients_X_(dL_dK, X, X2)
if config.getboolean('cython', 'working'):
return self._gradients_X_cython(dL_dK, X, X2)
else:
return self.gradients_X_(dL_dK, X, X2)
return self._gradients_X_pure(dL_dK, X, X2)
def gradients_X_(self, dL_dK, X, X2=None):
def _gradients_X_pure(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp = invdist*dL_dr
@ -235,54 +211,25 @@ class Stationary(Kern):
#The high-memory numpy way:
#d = X[:, None, :] - X2[None, :, :]
#ret = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2
#grad = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2
#the lower memory way with a loop
ret = np.empty(X.shape, dtype=np.float64)
grad = np.empty(X.shape, dtype=np.float64)
for q in range(self.input_dim):
np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=ret[:,q])
ret /= self.lengthscale**2
np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=grad[:,q])
return grad/self.lengthscale**2
return ret
def gradients_X_weave(self, dL_dK, X, X2=None):
def _gradients_X_cython(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp = invdist*dL_dr
if X2 is None:
tmp = tmp + tmp.T
X2 = X
code = """
int n,m,d;
double retnd;
#pragma omp parallel for private(n,d, retnd, m)
for(d=0;d<D;d++){
for(n=0;n<N;n++){
retnd = 0.0;
for(m=0;m<M;m++){
retnd += tmp(n,m)*(X(n,d)-X2(m,d));
}
ret(n,d) = retnd;
}
}
"""
if hasattr(X, 'values'):X = X.values #remove the GPy wrapping to make passing into weave safe
if hasattr(X2, 'values'):X2 = X2.values
ret = np.zeros(X.shape)
N,D = X.shape
N,M = tmp.shape
from scipy import weave
support_code = """
#include <omp.h>
#include <stdio.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
'extra_link_args' : ['-lgomp']}
weave.inline(code, ['ret', 'N', 'D', 'M', 'tmp', 'X', 'X2'], type_converters=weave.converters.blitz, support_code=support_code, **weave_options)
return ret/self.lengthscale**2
X, X2 = np.ascontiguousarray(X), np.ascontiguousarray(X2)
grad = np.zeros(X.shape)
stationary_cython.grad_X(X.shape[0], X.shape[1], X2.shape[0], X, X2, tmp, grad)
return grad/self.lengthscale**2
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
@ -290,6 +237,9 @@ class Stationary(Kern):
def input_sensitivity(self, summarize=True):
return self.variance*np.ones(self.input_dim)/self.lengthscale**2
class Exponential(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,59 @@
#cython: boundscheck=False
#cython: nonecheck=False
#cython: wraparound=False
import numpy as np
cimport numpy as np
from cython.parallel import prange
ctypedef np.float64_t DTYPE_t
cdef extern from "stationary_utils.h":
void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad)
cdef extern from "stationary_utils.h":
void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad)
def grad_X(int N, int D, int M,
np.ndarray[DTYPE_t, ndim=2] _X,
np.ndarray[DTYPE_t, ndim=2] _X2,
np.ndarray[DTYPE_t, ndim=2] _tmp,
np.ndarray[DTYPE_t, ndim=2] _grad):
cdef double *X = <double*> _X.data
cdef double *X2 = <double*> _X2.data
cdef double *tmp = <double*> _tmp.data
cdef double *grad = <double*> _grad.data
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
def grad_X_cython(int N, int D, int M, double[:,:] X, double[:,:] X2, double[:,:] tmp, double[:,:] grad):
cdef int n,d,nd,m
for nd in prange(N*D, nogil=True):
n = nd/D
d = nd%D
grad[n,d] = 0.0
for m in range(M):
grad[n,d] += tmp[n,m]*(X[n,d]-X2[m,d])
def lengthscale_grads_in_c(int N, int M, int Q,
np.ndarray[DTYPE_t, ndim=2] _tmp,
np.ndarray[DTYPE_t, ndim=2] _X,
np.ndarray[DTYPE_t, ndim=2] _X2,
np.ndarray[DTYPE_t, ndim=1] _grad):
cdef double *tmp = <double*> _tmp.data
cdef double *X = <double*> _X.data
cdef double *X2 = <double*> _X2.data
cdef double *grad = <double*> _grad.data
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
def lengthscale_grads(int N, int M, int Q, double[:,:] tmp, double[:,:] X, double[:,:] X2, double[:] grad):
cdef int q, n, m
cdef double gradq, dist
for q in range(Q):
grad[q] = 0.0
for n in range(N):
for m in range(M):
dist = X[n,q] - X2[m,q]
grad[q] += tmp[n,m]*dist*dist

View file

@ -0,0 +1,54 @@
void _grad_X(int N, int D, int M, double* X, double* X2, double* tmp, double* grad){
double retnd;
int n,d,nd,m;
#pragma omp parallel for private(nd,n,d, retnd, m)
for(nd=0;nd<(D*N);nd++){
n = nd/D;
d = nd%D;
retnd = 0.0;
for(m=0;m<M;m++){
retnd += tmp[n*M+m]*(X[nd]-X2[m*D+d]);
}
grad[nd] = retnd;
}
} //grad_X
void _lengthscale_grads_unsafe(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
int n,m,nm,q,nQ,mQ;
double dist;
#pragma omp parallel for private(n,m,nm,q,nQ,mQ,dist)
for(nm=0; nm<(N*M); nm++){
n = nm/M;
m = nm%M;
nQ = n*Q;
mQ = m*Q;
for(q=0; q<Q; q++){
dist = X[nQ+q]-X2[mQ+q];
grad[q] += tmp[nm]*dist*dist;
}
}
} //lengthscale_grads
void _lengthscale_grads(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
int n,m,q;
double gradq, dist;
#pragma omp parallel for private(n,m, gradq, dist)
for(q=0; q<Q; q++){
gradq = 0;
for(n=0; n<N; n++){
for(m=0; m<M; m++){
dist = X[n*Q+q]-X2[m*Q+q];
gradq += tmp[n*M+m]*dist*dist;
}
}
grad[q] = gradq;
}
} //lengthscale_grads

View file

@ -0,0 +1,3 @@
#include <omp.h>
void _grad_X(int N, int D, int M, double*X, double* X2, double* tmp, double* grad);
void _lengthscale_grads(int N, int D, int M, double* X, double* X2, double* tmp, double* grad);

View file

@ -41,6 +41,14 @@ class Likelihood(Parameterized):
self.log_concave = False
self.not_block_really = False
def request_num_latent_functions(self, Y):
"""
The likelihood should infer how many latent functions are needed for the likelihood
Default is the number of outputs
"""
return Y.shape[1]
def _gradients(self,partial):
return np.zeros(0)
@ -118,22 +126,57 @@ class Likelihood(Parameterized):
"""Generate a function which can be integrated
to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
def f(fi_star):
#exponent = np.exp(-(1./(2*v))*np.square(m-f_star))
#exponent = np.exp(-(1./(2*vi))*np.square(mi-fi_star))
#from GPy.util.misc import safe_exp
#exponent = safe_exp(exponent)
#return self.pdf(f_star, y, y_m)*exponent
#res = safe_exp(self.logpdf(fi_star, yi, yi_m))*exponent
#More stable in the log space
return np.exp(self.logpdf(fi_star, yi, yi_m)
res = np.exp(self.logpdf(fi_star, yi, yi_m)
- 0.5*np.log(2*np.pi*vi)
- 0.5*np.square(mi-fi_star)/vi)
- 0.5*np.square(fi_star-mi)/vi)
if not np.isfinite(res):
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
return res
return f
p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf)
for yi, mi, vi, yi_m in zipped_values])
p_ystar = np.array(p_ystar).reshape(-1, 1)
p_ystar = np.array(p_ystar).reshape(*y_test.shape)
return np.log(p_ystar)
def log_predictive_density_sampling(self, y_test, mu_star, var_star, Y_metadata=None, num_samples=1000):
"""
Calculation of the log predictive density via sampling
.. math:
log p(y_{*}|D) = log 1/num_samples prod^{S}_{s=1} p(y_{*}|f_{*s})
f_{*s} ~ p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
:param num_samples: num samples of p(f_{*}|mu_{*}, var_{*}) to take
:type num_samples: int
"""
assert y_test.shape==mu_star.shape
assert y_test.shape==var_star.shape
assert y_test.shape[1] == 1
#Take samples of p(f*|y)
#fi_samples = np.random.randn(num_samples)*np.sqrt(var_star) + mu_star
fi_samples = np.random.normal(mu_star, np.sqrt(var_star), size=(mu_star.shape[0], num_samples))
from scipy.misc import logsumexp
log_p_ystar = -np.log(num_samples) + logsumexp(self.logpdf(fi_samples, y_test, Y_metadata=Y_metadata), axis=1)
log_p_ystar = np.array(log_p_ystar).reshape(*y_test.shape)
return log_p_ystar
def _moments_match_ep(self,obs,tau,v):
"""
Calculation of moments using quadrature
@ -170,9 +213,9 @@ class Likelihood(Parameterized):
#only compute gh points if required
__gh_points = None
def _gh_points(self):
def _gh_points(self, T=20):
if self.__gh_points is None:
self.__gh_points = np.polynomial.hermite.hermgauss(20)
self.__gh_points = np.polynomial.hermite.hermgauss(T)
return self.__gh_points
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
@ -211,9 +254,11 @@ class Likelihood(Parameterized):
#d2logp_dx2 = np.clip(d2logp_dx2,-1e9,1e9)
#average over the gird to get derivatives of the Gaussian's parameters
F = np.dot(logp, gh_w)
dF_dm = np.dot(dlogp_dx, gh_w)
dF_dv = np.dot(d2logp_dx2, gh_w)/2.
#division by pi comes from fact that for each quadrature we need to scale by 1/sqrt(pi)
F = np.dot(logp, gh_w)/np.sqrt(np.pi)
dF_dm = np.dot(dlogp_dx, gh_w)/np.sqrt(np.pi)
dF_dv = np.dot(d2logp_dx2, gh_w)/np.sqrt(np.pi)
dF_dv /= 2.
if np.any(np.isnan(dF_dv)) or np.any(np.isinf(dF_dv)):
stop
@ -221,8 +266,8 @@ class Likelihood(Parameterized):
stop
if self.size:
dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None]) # Ntheta x (orig size) x N_{quad_points}
dF_dtheta = np.dot(dF_dtheta, gh_w)
dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None], Y_metadata=Y_metadata) # Ntheta x (orig size) x N_{quad_points}
dF_dtheta = np.dot(dF_dtheta, gh_w)/np.sqrt(np.pi)
dF_dtheta = dF_dtheta.reshape(self.size, shape[0], shape[1])
else:
dF_dtheta = None # Not yet implemented
@ -253,13 +298,8 @@ class Likelihood(Parameterized):
return self.conditional_mean(f)*p
scaled_mean = [quad(int_mean, fmin, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
return mean
def _conditional_mean(self, f):
"""Quadrature calculation of the conditional mean: E(Y_star|f)"""
raise NotImplementedError("implement this function to make predictions")
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
"""
Approximation to the predictive variance: V(Y_star)
@ -563,23 +603,30 @@ class Likelihood(Parameterized):
:param full_cov: whether to use the full covariance or just the diagonal
:type full_cov: Boolean
"""
pred_mean = self.predictive_mean(mu, var, Y_metadata)
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata)
try:
pred_mean = self.predictive_mean(mu, var, Y_metadata=Y_metadata)
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata=Y_metadata)
except NotImplementedError:
print "Finding predictive mean and variance via sampling rather than quadrature"
Nf_samp = 300
Ny_samp = 1
s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu
ss_y = self.samples(s, Y_metadata, samples=Ny_samp)
pred_mean = np.mean(ss_y, axis=1)[:, None]
pred_var = np.var(ss_y, axis=1)[:, None]
return pred_mean, pred_var
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
#compute the quantiles by sampling!!!
N_samp = 500
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
#ss_f = s.flatten()
#ss_y = self.samples(ss_f, Y_metadata)
#ss_y = self.samples(s, Y_metadata, samples=100)
ss_y = self.samples(s, Y_metadata)
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
Nf_samp = 300
Ny_samp = 1
s = np.random.randn(mu.shape[0], Nf_samp)*np.sqrt(var) + mu
ss_y = self.samples(s, Y_metadata, samples=Ny_samp)
#ss_y = ss_y.reshape(mu.shape[0], mu.shape[1], Nf_samp*Ny_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
pred_quantiles = [np.percentile(ss_y, q, axis=1)[:,None] for q in quantiles]
return pred_quantiles
def samples(self, gp, Y_metadata=None, samples=1):
"""

View file

@ -10,6 +10,7 @@ from scipy.special import gammaln, gamma
from .likelihood import Likelihood
from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
from scipy.special import psi as digamma
class StudentT(Likelihood):
"""
@ -28,16 +29,13 @@ class StudentT(Likelihood):
super(StudentT, self).__init__(gp_link, name='Student_T')
# sigma2 is not a noise parameter, it is a squared scale.
self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
self.v = Param('deg_free', float(deg_free))
self.v = Param('deg_free', float(deg_free), Logexp())
self.link_parameter(self.sigma2)
self.link_parameter(self.v)
self.v.constrain_fixed()
#self.v.constrain_fixed()
self.log_concave = False
#def parameters_changed(self):
#self.variance = (self.v / float(self.v - 2)) * self.sigma2
def update_gradients(self, grads):
"""
Pull out the gradients, be careful as the order must match the order
@ -224,20 +222,46 @@ class StudentT(Likelihood):
)
return d2logpdf_dlink2_dvar
def dlogpdf_link_dv(self, inv_link_f, y, Y_metadata=None):
e = y - inv_link_f
e2 = np.square(e)
df = float(self.v[:])
s2 = float(self.sigma2[:])
dlogpdf_dv = 0.5*digamma(0.5*(df+1)) - 0.5*digamma(0.5*df) - 1.0/(2*df)
dlogpdf_dv += 0.5*(df+1)*e2/(df*(e2 + s2*df))
dlogpdf_dv -= 0.5*np.log1p(e2/(s2*df))
return dlogpdf_dv
def dlogpdf_dlink_dv(self, inv_link_f, y, Y_metadata=None):
e = y - inv_link_f
e2 = np.square(e)
df = float(self.v[:])
s2 = float(self.sigma2[:])
dlogpdf_df_dv = e*(e2 - self.sigma2)/(e2 + s2*df)**2
return dlogpdf_df_dv
def d2logpdf_dlink2_dv(self, inv_link_f, y, Y_metadata=None):
e = y - inv_link_f
e2 = np.square(e)
df = float(self.v[:])
s2 = float(self.sigma2[:])
e2_s2v = e**2 + s2*df
d2logpdf_df2_dv = (-s2*(df+1) + e2 - s2*df)/e2_s2v**2 - 2*s2*(df+1)*(e2 - s2*df)/e2_s2v**3
return d2logpdf_df2_dv
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
dlogpdf_dv = self.dlogpdf_link_dv(f, y, Y_metadata=Y_metadata)
return np.array((dlogpdf_dvar, dlogpdf_dv))
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
dlogpdf_dlink_dv = self.dlogpdf_dlink_dv(f, y, Y_metadata=Y_metadata)
return np.array((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
d2logpdf_dlink2_dv = self.d2logpdf_dlink2_dv(f, y, Y_metadata=Y_metadata)
return np.array((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def predictive_mean(self, mu, sigma, Y_metadata=None):

View file

@ -6,4 +6,5 @@ from .linear import Linear
from .mlp import MLP
from .additive import Additive
from .compound import Compound
from .constant import Constant

40
GPy/mappings/constant.py Normal file
View file

@ -0,0 +1,40 @@
# Copyright (c) 2015, James Hensman, Alan Saul
import numpy as np
from ..core.mapping import Mapping
from ..core.parameterization import Param
class Constant(Mapping):
"""
A Linear mapping.
.. math::
F(\mathbf{x}) = c
:param input_dim: dimension of input.
:type input_dim: int
:param output_dim: dimension of output.
:type output_dim: int
:param: value the value of this constant mapping
"""
def __init__(self, input_dim, output_dim, value=0., name='constmap'):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
value = np.atleast_1d(value)
if not len(value.shape) ==1:
raise ValueError("bad constant values: pass a float or flat vectoor")
elif value.size==1:
value = np.ones(self.output_dim)*value
self.C = Param('C', value)
self.link_parameter(self.C)
def f(self, X):
return np.tile(self.C.values[None,:], (X.shape[0], 1))
def update_gradients(self, dL_dF, X):
self.C.gradient = dL_dF.sum(0)
def gradients_X(self, dL_dF, X):
return np.zeros_like(X)

View file

@ -64,9 +64,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
self.logger.debug("creating inference_method var_dtc")
inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1])
if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
kernel.psicomp.GPU_direct = True
super(BayesianGPLVMMiniBatch,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
name=name, inference_method=inference_method,
normalizer=normalizer,

View file

@ -3,7 +3,7 @@
import numpy as np
from .. import kern
from bayesian_gplvm import BayesianGPLVM
from .bayesian_gplvm import BayesianGPLVM
from ..core.parameterization.variational import NormalPosterior, NormalPrior
class DPBayesianGPLVM(BayesianGPLVM):
@ -15,5 +15,5 @@ class DPBayesianGPLVM(BayesianGPLVM):
name='bayesian gplvm', mpi_comm=None, normalizer=None,
missing_data=False, stochastic=False, batchsize=1):
super(DPBayesianGPLVM,self).__init__(Y=Y, input_dim=input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, mpi_comm=mpi_comm, normalizer=normalizer, missing_data=missing_data, stochastic=stochastic, batchsize=batchsize, name='dp bayesian gplvm')
self.X.mean.set_prior(X_prior)
self.X.mean.set_prior(X_prior)
self.link_parameter(X_prior)

View file

@ -16,6 +16,7 @@ class GPRegression(GP):
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf
:param Norm normalizer: [False]
:param noise_var: the noise variance for Gaussian likelhood, defaults to 1.
Normalize Y with the norm given.
If normalizer is False, no normalization will be done
@ -25,12 +26,12 @@ class GPRegression(GP):
"""
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None):
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1.):
if kernel is None:
kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Gaussian()
likelihood = likelihoods.Gaussian(variance=noise_var)
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer)

View file

@ -9,13 +9,14 @@ from ..core.parameterization import ObsAr
from .. import kern
from ..core.parameterization.param import Param
from ..util.linalg import pdinv
from ..likelihoods import Gaussian
log_2_pi = np.log(2*np.pi)
class GPVariationalGaussianApproximation(Model):
"""
The Variational Gaussian Approximation revisited implementation for regression
The Variational Gaussian Approximation revisited
@article{Opper:2009,
title = {The Variational Gaussian Approximation Revisited},
@ -25,44 +26,29 @@ class GPVariationalGaussianApproximation(Model):
pages = {786--792},
}
"""
def __init__(self, X, Y, kernel=None):
Model.__init__(self,'Variational GP classification')
def __init__(self, X, Y, kernel, likelihood=None, Y_metadata=None):
Model.__init__(self,'Variational GP')
if likelihood is None:
likelihood = Gaussian()
# accept the construction arguments
self.X = ObsAr(X)
if kernel is None:
kernel = kern.RBF(X.shape[1]) + kern.White(X.shape[1], 0.01)
self.kern = kernel
self.link_parameter(self.kern)
self.Y = Y
self.num_data, self.input_dim = self.X.shape
self.Y_metadata = Y_metadata
self.alpha = Param('alpha', np.zeros(self.num_data))
self.kern = kernel
self.likelihood = likelihood
self.link_parameter(self.kern)
self.link_parameter(self.likelihood)
self.alpha = Param('alpha', np.zeros((self.num_data,1))) # only one latent fn for now.
self.beta = Param('beta', np.ones(self.num_data))
self.link_parameter(self.alpha)
self.link_parameter(self.beta)
self.gh_x, self.gh_w = np.polynomial.hermite.hermgauss(20)
self.Ysign = np.where(Y==1, 1, -1).flatten()
def log_likelihood(self):
"""
Marginal log likelihood evaluation
"""
return self._log_lik
def likelihood_quadrature(self, m, v):
"""
Perform Gauss-Hermite quadrature over the log of the likelihood, with a fixed weight
"""
# assume probit for now.
X = self.gh_x[None, :]*np.sqrt(2.*v[:, None]) + (m*self.Ysign)[:, None]
p = stats.norm.cdf(X)
N = stats.norm.pdf(X)
F = np.log(p).dot(self.gh_w)
NoverP = N/p
dF_dm = (NoverP*self.Ysign[:,None]).dot(self.gh_w)
dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(self.gh_w)
return F, dF_dm, dF_dv
def parameters_changed(self):
K = self.kern.K(self.X)
m = K.dot(self.alpha)
@ -71,13 +57,14 @@ class GPVariationalGaussianApproximation(Model):
A = np.eye(self.num_data) + BKB
Ai, LA, _, Alogdet = pdinv(A)
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
var = np.diag(Sigma)
var = np.diag(Sigma).reshape(-1,1)
F, dF_dm, dF_dv = self.likelihood_quadrature(m, var)
F, dF_dm, dF_dv, dF_dthetaL = self.likelihood.variational_expectations(self.Y, m, var, Y_metadata=self.Y_metadata)
self.likelihood.gradient = dF_dthetaL.sum(1).sum(1)
dF_da = np.dot(K, dF_dm)
SigmaB = Sigma*self.beta
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv)).dot(SigmaB))*2
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + m.dot(self.alpha))
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + np.sum(m*self.alpha))
dKL_da = m
A_A2 = Ai - Ai.dot(Ai)
dKL_db = np.diag(np.dot(KB.T, A_A2))
@ -86,12 +73,12 @@ class GPVariationalGaussianApproximation(Model):
self.beta.gradient = dF_db - dKL_db
# K-gradients
dKL_dK = 0.5*(self.alpha[None, :]*self.alpha[:, None] + self.beta[:, None]*self.beta[None, :]*A_A2)
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
tmp = Ai*self.beta[:, None]/self.beta[None, :]
dF_dK = self.alpha[:, None]*dF_dm[None, :] + np.dot(tmp*dF_dv, tmp.T)
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
def predict(self, Xnew):
def _raw_predict(self, Xnew):
"""
Predict the function(s) at the new point(s) Xnew.
@ -105,4 +92,4 @@ class GPVariationalGaussianApproximation(Model):
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(WiKux*Kux, 0)
return 0.5*(1+erf(mu/np.sqrt(2.*(var+1))))
return mu, var.reshape(-1,1)

View file

@ -228,14 +228,14 @@ class HessianChecker(GradientChecker):
if verbose:
if block_indices:
print "\nBlock {}".format(block_indices)
print("\nBlock {}".format(block_indices))
else:
print "\nAll blocks"
print("\nAll blocks")
header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference']
header_string = map(lambda x: ' | '.join(header), [header])
separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator])
print('\n'.join([header_string[0], separator]))
min_r = '%.6f' % float(numpy.min(ratio))
max_r = '%.6f' % float(numpy.max(ratio))
max_d = '%.6f' % float(numpy.max(difference))
@ -248,7 +248,7 @@ class HessianChecker(GradientChecker):
checked = "\033[91m False \033[0m"
grad_string = "{} | {} | {} | {} | {} ".format(checked, cols[0], cols[1], cols[2], cols[3])
print grad_string
print(grad_string)
if plot:
import pylab as pb
@ -348,7 +348,7 @@ class SkewChecker(HessianChecker):
numeric_hess_partial = nd.Jacobian(self._df, vectorized=True)
numeric_hess = numeric_hess_partial(x)
print "Done making numerical hessian"
print("Done making numerical hessian")
if analytic_hess.dtype is np.dtype('object'):
#Blockify numeric_hess aswell
blocksizes, pagesizes = get_block_shapes_3d(analytic_hess)
@ -365,7 +365,7 @@ class SkewChecker(HessianChecker):
#Unless super_plot is set, just plot the first one
p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False
if verbose:
print "Checking derivative of hessian wrt parameter number {}".format(block_ind)
print("Checking derivative of hessian wrt parameter number {}".format(block_ind))
check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p)
current_index += current_size

View file

@ -5,11 +5,95 @@ import numpy as np
from ..core.sparse_gp_mpi import SparseGP_MPI
from .. import kern
from ..core.parameterization import Param
from ..likelihoods import Gaussian
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior,VariationalPrior
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
from ..kern._src.psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF_GPU
class IBPPosterior(SpikeAndSlabPosterior):
'''
The SpikeAndSlab distribution for variational approximations.
'''
def __init__(self, means, variances, binary_prob, tau=None, sharedX=False, name='latent space'):
"""
binary_prob : the probability of the distribution on the slab part.
"""
from ..core.parameterization.transformations import Logexp
super(IBPPosterior, self).__init__(means, variances, binary_prob, group_spike=True, name=name)
self.sharedX = sharedX
if sharedX:
self.mean.fix(warning=False)
self.variance.fix(warning=False)
self.tau = Param("tau_", np.ones((self.gamma_group.shape[0],2)), Logexp())
self.link_parameter(self.tau)
def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient, self.tau.gradient = grad
def __getitem__(self, s):
if isinstance(s, (int, slice, tuple, list, np.ndarray)):
import copy
n = self.__new__(self.__class__, self.name)
dc = self.__dict__.copy()
dc['mean'] = self.mean[s]
dc['variance'] = self.variance[s]
dc['binary_prob'] = self.binary_prob[s]
dc['tau'] = self.tau
dc['parameters'] = copy.copy(self.parameters)
n.__dict__.update(dc)
n.parameters[dc['mean']._parent_index_] = dc['mean']
n.parameters[dc['variance']._parent_index_] = dc['variance']
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
n.parameters[dc['tau']._parent_index_] = dc['tau']
n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size - self.gamma.size - self.tau.size
n.size = n.mean.size + n.variance.size + n.gamma.size+ n.tau.size + oversize
n.ndim = n.mean.ndim
n.shape = n.mean.shape
n.num_data = n.mean.shape[0]
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
return n
else:
return super(IBPPosterior, self).__getitem__(s)
class IBPPrior(VariationalPrior):
def __init__(self, input_dim, alpha =2., name='IBPPrior', **kw):
super(IBPPrior, self).__init__(name=name, **kw)
from ..core.parameterization.transformations import Logexp, __fixed__
self.input_dim = input_dim
self.variance = 1.
self.alpha = Param('alpha', alpha, __fixed__)
self.link_parameter(self.alpha)
def KL_divergence(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
part1 = (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
ad = self.alpha/self.input_dim
from scipy.special import betaln,digamma
part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + betaln(ad,1.)*self.input_dim \
-betaln(tau[:,0], tau[:,1]).sum() + ((tau[:,0]-gamma-ad)*digamma(tau[:,0])).sum() + \
((tau[:,1]+gamma-2.)*digamma(tau[:,1])).sum() + ((2.+ad-tau[:,0]-tau[:,1])*digamma(tau.sum(axis=1))).sum()
return part1+part2
def update_gradients_KL(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
variational_posterior.mean.gradient -= gamma*mu/self.variance
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * gamma /2.
from scipy.special import digamma,polygamma
dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
ad = self.alpha/self.input_dim
common = (ad+2-tau[:,0]-tau[:,1])*polygamma(1,tau.sum(axis=1))
variational_posterior.tau.gradient[:,0] = -((tau[:,0]-gamma-ad)*polygamma(1,tau[:,0])+common)
variational_posterior.tau.gradient[:,1] = -((tau[:,1]+gamma-2)*polygamma(1,tau[:,1])+common)
class SSGPLVM(SparseGP_MPI):
"""
Spike-and-Slab Gaussian Process Latent Variable Model
@ -23,9 +107,11 @@ class SSGPLVM(SparseGP_MPI):
"""
def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, mpi_comm=None, pi=None, learnPi=True,normalizer=False, **kwargs):
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, IBP=False, alpha=2., tau=None, mpi_comm=None, pi=None, learnPi=False,normalizer=False, sharedX=False, variational_prior=None,**kwargs):
self.group_spike = group_spike
self.init = init
self.sharedX = sharedX
if X == None:
from ..util.initialization import initialize_latent
@ -33,8 +119,6 @@ class SSGPLVM(SparseGP_MPI):
else:
fracs = np.ones(input_dim)
self.init = init
if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape)
@ -64,17 +148,16 @@ class SSGPLVM(SparseGP_MPI):
if pi is None:
pi = np.empty((input_dim))
pi[:] = 0.5
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi) # the prior probability of the latent binary variable b
X = SpikeAndSlabPosterior(X, X_variance, gamma)
if IBP:
self.variational_prior = IBPPrior(input_dim=input_dim, alpha=alpha) if variational_prior is None else variational_prior
X = IBPPosterior(X, X_variance, gamma, tau=tau,sharedX=sharedX)
else:
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) if variational_prior is None else variational_prior
X = SpikeAndSlabPosterior(X, X_variance, gamma, group_spike=group_spike,sharedX=sharedX)
super(SSGPLVM,self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
# self.X.unfix()
# self.X.variance.constrain_positive()
self.link_parameter(self.X, index=0)
if self.group_spike:
[self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in range(self.X.gamma.shape[1])] # Tie columns together
def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form."""
@ -84,9 +167,15 @@ class SSGPLVM(SparseGP_MPI):
"""Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient, X.binary_prob.gradient
def _propogate_X_val(self):
pass
def parameters_changed(self):
self.X.propogate_val()
if self.sharedX: self._highest_parent_._propogate_X_val()
super(SSGPLVM,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch):
self.X.collate_gradient()
return
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
@ -95,6 +184,7 @@ class SSGPLVM(SparseGP_MPI):
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X)
self.X.collate_gradient()
def input_sensitivity(self):
if self.kern.ARD:

View file

@ -2,33 +2,256 @@
The Maniforld Relevance Determination model with the spike-and-slab prior
"""
import numpy as np
from ..core import Model
from .ss_gplvm import SSGPLVM
from ..core.parameterization.variational import SpikeAndSlabPrior,NormalPosterior,VariationalPrior
from ..util.misc import param_to_array
from ..kern import RBF
from ..core import Param
from numpy.linalg.linalg import LinAlgError
class SSMRD(Model):
def __init__(self, Ylist, input_dim, X=None, X_variance=None,
initx = 'PCA', initz = 'permute',
num_inducing=10, Z=None, kernel=None,
inference_method=None, likelihoods=None, name='ss_mrd', Ynames=None):
def __init__(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx = 'PCA_concat', initz = 'permute',
num_inducing=10, Zs=None, kernels=None, inference_methods=None, likelihoods=None, group_spike=True,
pi=0.5, name='ss_mrd', Ynames=None, mpi_comm=None, IBP=False, alpha=2., taus=None, ):
super(SSMRD, self).__init__(name)
self.mpi_comm = mpi_comm
self._PROPAGATE_ = False
self.updates = False
self.models = [SSGPLVM(y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing,Z=Z,init=initx,
kernel=kernel.copy() if kernel else None,inference_method=inference_method,likelihood=likelihoods,
name='model_'+str(i)) for i,y in enumerate(Ylist)]
self.add_parameters(*(self.models))
# initialize X for individual models
X, X_variance, Gammas, fracs = self._init_X(Ylist, input_dim, X, X_variance, Gammas, initx)
self.X = NormalPosterior(means=X, variances=X_variance)
[[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.mean.shape[1])]
for i in range(self.models[0].X.mean.shape[0])]
[[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.variance.shape[1])]
for i in range(self.models[0].X.variance.shape[0])]
if kernels is None:
kernels = [RBF(input_dim, lengthscale=1./fracs, ARD=True) for i in xrange(len(Ylist))]
if Zs is None:
Zs = [None]* len(Ylist)
if likelihoods is None:
likelihoods = [None]* len(Ylist)
if inference_methods is None:
inference_methods = [None]* len(Ylist)
self.updates = True
if IBP:
self.var_priors = [IBPPrior_SSMRD(len(Ylist),input_dim,alpha=alpha) for i in xrange(len(Ylist))]
else:
self.var_priors = [SpikeAndSlabPrior_SSMRD(nModels=len(Ylist),pi=pi,learnPi=False, group_spike=group_spike) for i in xrange(len(Ylist))]
self.models = [SSGPLVM(y, input_dim, X=X.copy(), X_variance=X_variance.copy(), Gamma=Gammas[i], num_inducing=num_inducing,Z=Zs[i], learnPi=False, group_spike=group_spike,
kernel=kernels[i],inference_method=inference_methods[i],likelihood=likelihoods[i], variational_prior=self.var_priors[i], IBP=IBP, tau=None if taus is None else taus[i],
name='model_'+str(i), mpi_comm=mpi_comm, sharedX=True) for i,y in enumerate(Ylist)]
self.link_parameters(*(self.models+[self.X]))
def _propogate_X_val(self):
if self._PROPAGATE_: return
for m in self.models:
m.X.mean.values[:] = self.X.mean.values
m.X.variance.values[:] = self.X.variance.values
varp_list = [m.X for m in self.models]
[vp._update_inernal(varp_list) for vp in self.var_priors]
self._PROPAGATE_=True
def _collate_X_gradient(self):
self._PROPAGATE_ = False
self.X.mean.gradient[:] = 0
self.X.variance.gradient[:] = 0
for m in self.models:
self.X.mean.gradient += m.X.mean.gradient
self.X.variance.gradient += m.X.variance.gradient
def parameters_changed(self):
super(SSMRD, self).parameters_changed()
super(SSMRD, self).parameters_changed()
[m.parameters_changed() for m in self.models]
self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
self._collate_X_gradient()
def log_likelihood(self):
return self._log_marginal_likelihood
def _init_X(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx='PCA_concat'):
# Divide latent dimensions
idx = np.empty((input_dim,),dtype=np.int)
residue = (input_dim)%(len(Ylist))
for i in xrange(len(Ylist)):
if i < residue:
size = input_dim/len(Ylist)+1
idx[i*size:(i+1)*size] = i
else:
size = input_dim/len(Ylist)
idx[i*size+residue:(i+1)*size+residue] = i
if X is None:
if initx == 'PCA_concat':
X = np.empty((Ylist[0].shape[0],input_dim))
fracs = np.empty((input_dim,))
from ..util.initialization import initialize_latent
for i in xrange(len(Ylist)):
Y = Ylist[i]
dim = (idx==i).sum()
if dim>0:
x, fr = initialize_latent('PCA', dim, Y)
X[:,idx==i] = x
fracs[idx==i] = fr
elif initx=='PCA_joint':
y = np.hstack(Ylist)
from ..util.initialization import initialize_latent
X, fracs = initialize_latent('PCA', input_dim, y)
else:
X = np.random.randn(Ylist[0].shape[0], input_dim)
fracs = np.ones(input_dim)
else:
fracs = np.ones(input_dim)
if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape)
if Gammas is None:
Gammas = []
for x in X:
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
gamma[gamma>1.-1e-9] = 1.-1e-9
gamma[gamma<1e-9] = 1e-9
Gammas.append(gamma)
return X, X_variance, Gammas, fracs
@Model.optimizer_array.setter
def optimizer_array(self, p):
if self.mpi_comm != None:
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
self.mpi_comm.Bcast(np.int32(1),root=0)
self.mpi_comm.Bcast(p, root=0)
Model.optimizer_array.fset(self,p)
def optimize(self, optimizer=None, start=None, **kwargs):
self._IN_OPTIMIZATION_ = True
if self.mpi_comm==None:
super(SSMRD, self).optimize(optimizer,start,**kwargs)
elif self.mpi_comm.rank==0:
super(SSMRD, self).optimize(optimizer,start,**kwargs)
self.mpi_comm.Bcast(np.int32(-1),root=0)
elif self.mpi_comm.rank>0:
x = self.optimizer_array.copy()
flag = np.empty(1,dtype=np.int32)
while True:
self.mpi_comm.Bcast(flag,root=0)
if flag==1:
try:
self.optimizer_array = x
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError):
if self._fail_count >= self._allowed_failures:
raise
self._fail_count += 1
elif flag==-1:
break
else:
self._IN_OPTIMIZATION_ = False
raise Exception("Unrecognizable flag for synchronization!")
self._IN_OPTIMIZATION_ = False
class SpikeAndSlabPrior_SSMRD(SpikeAndSlabPrior):
def __init__(self, nModels, pi=0.5, learnPi=False, group_spike=True, variance = 1.0, name='SSMRDPrior', **kw):
self.nModels = nModels
self._b_prob_all = 0.5
super(SpikeAndSlabPrior_SSMRD, self).__init__(pi=pi,learnPi=learnPi,group_spike=group_spike,variance=variance, name=name, **kw)
def _update_inernal(self, varp_list):
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
# The probability for the binary variable for the same latent dimension of any of the models is on.
if self.group_spike:
self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
[np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
else:
self._b_prob_all = 1.-param_to_array(varp_list[0].binary_prob)
[np.multiply(self._b_prob_all, 1.-vp.binary_prob, self._b_prob_all) for vp in varp_list[1:]]
def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
if self.group_spike:
gamma = variational_posterior.binary_prob[0]
else:
gamma = variational_posterior.binary_prob
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
return var_gamma +((1.-self._b_prob_all)*(np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
N = variational_posterior.num_data
if self.group_spike:
gamma = variational_posterior.binary_prob.values[0]
else:
gamma = variational_posterior.binary_prob.values
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
if self.group_spike:
tmp = self._b_prob_all/(1.-gamma)
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/N +tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
else:
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
S.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
if self.learnPi:
raise 'Not Supported!'
class IBPPrior_SSMRD(VariationalPrior):
def __init__(self, nModels, input_dim, alpha =2., tau=None, name='IBPPrior', **kw):
super(IBPPrior_SSMRD, self).__init__(name=name, **kw)
from ..core.parameterization.transformations import Logexp, __fixed__
self.nModels = nModels
self._b_prob_all = 0.5
self.input_dim = input_dim
self.variance = 1.
self.alpha = Param('alpha', alpha, __fixed__)
self.link_parameter(self.alpha)
def _update_inernal(self, varp_list):
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
# The probability for the binary variable for the same latent dimension of any of the models is on.
self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
[np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
def KL_divergence(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
part1 = ((1.-self._b_prob_all)* (np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
ad = self.alpha/self.input_dim
from scipy.special import betaln,digamma
part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + (betaln(ad,1.)*self.input_dim -betaln(tau[:,0], tau[:,1]).sum())/self.nModels \
+ (( (tau[:,0]-ad)/self.nModels -gamma)*digamma(tau[:,0])).sum() + \
(((tau[:,1]-1.)/self.nModels+gamma-1.)*digamma(tau[:,1])).sum() + (((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*digamma(tau.sum(axis=1))).sum()
return part1+part2
def update_gradients_KL(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
variational_posterior.mean.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
from scipy.special import digamma,polygamma
tmp = self._b_prob_all/(1.-gamma)
dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data
variational_posterior.binary_prob.gradient -= dgamma+tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
ad = self.alpha/self.input_dim
common = ((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*polygamma(1,tau.sum(axis=1))
variational_posterior.tau.gradient[:,0] = -(((tau[:,0]-ad)/self.nModels -gamma)*polygamma(1,tau[:,0])+common)
variational_posterior.tau.gradient[:,1] = -(((tau[:,1]-1.)/self.nModels+gamma-1.)*polygamma(1,tau[:,1])+common)

View file

@ -11,13 +11,14 @@ from base_plots import gpplot, x_frame1D, x_frame2D
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from scipy import sparse
from ...core.parameterization.variational import VariationalPosterior
def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx',
apply_link=False, samples_f=0, plot_uncertain_inputs=True):
apply_link=False, samples_f=0, plot_uncertain_inputs=True, predict_kw=None):
"""
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -76,6 +77,9 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if hasattr(model, 'Z'): Z = model.Z
if predict_kw is None:
predict_kw = {}
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
@ -92,7 +96,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#make a prediction on the frame and plot it
if plot_raw:
m, v = model._raw_predict(Xgrid)
m, v = model._raw_predict(Xgrid, **predict_kw)
if apply_link:
lower = model.likelihood.gp_link.transf(m - 2*np.sqrt(v))
upper = model.likelihood.gp_link.transf(m + 2*np.sqrt(v))
@ -103,11 +107,13 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
upper = m + 2*np.sqrt(v)
else:
if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
else:
meta = None
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta)
lower, upper = model.predict_quantiles(Xgrid, Y_metadata=meta)
extra_data = Xgrid[:,-1:].astype(np.int)
if Y_metadata is None:
Y_metadata = {'output_index': extra_data}
else:
Y_metadata['output_index'] = extra_data
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
lower, upper = model.predict_quantiles(Xgrid, Y_metadata=Y_metadata)
for d in which_data_ycols:
@ -116,7 +122,9 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#optionally plot some samples
if samples: #NOTE not tested with fixed_inputs
Ysim = model.posterior_samples(Xgrid, samples)
Ysim = model.posterior_samples(Xgrid, samples, Y_metadata=Y_metadata)
print Ysim.shape
print Xnew.shape
for yi in Ysim.T:
plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
@ -178,13 +186,15 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#predict on the frame and plot
if plot_raw:
m, _ = model._raw_predict(Xgrid)
m, _ = model._raw_predict(Xgrid, **predict_kw)
else:
if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
else:
meta = None
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta)
extra_data = Xgrid[:,-1:].astype(np.int)
if Y_metadata is None:
Y_metadata = {'output_index': extra_data}
else:
Y_metadata['output_index'] = extra_data
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=Y_metadata, **predict_kw)
for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
@ -216,7 +226,7 @@ def plot_fit_f(model, *args, **kwargs):
kwargs['plot_raw'] = True
plot_fit(model,*args, **kwargs)
def fixed_inputs(model, non_fixed_inputs, fix_routine='median'):
def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True, X_all=False):
"""
Convenience function for returning back fixed_inputs where the other inputs
are fixed using fix_routine
@ -226,18 +236,30 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median'):
:type non_fixed_inputs: list
:param fix_routine: fixing routine to use, 'mean', 'median', 'zero'
:type fix_routine: string
:param as_list: if true, will return a list of tuples with (dimension, fixed_val) otherwise it will create the corresponding X matrix
:type as_list: boolean
"""
f_inputs = []
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean.values.copy()
else:
elif isinstance(model.X, VariationalPosterior):
X = model.X.values.copy()
else:
if X_all:
X = model.X_all.copy()
else:
X = model.X.copy()
for i in range(X.shape[1]):
if i not in non_fixed_inputs:
if fix_routine == 'mean':
f_inputs.append( (i, np.mean(X[:,i])) )
if fix_routine == 'median':
f_inputs.append( (i, np.median(X[:,i])) )
elif fix_routine == 'zero':
else: # set to zero zero
f_inputs.append( (i, 0) )
return f_inputs
if not as_list:
X[:,i] = f_inputs[-1][1]
if as_list:
return f_inputs
else:
return X

View file

@ -408,12 +408,13 @@ class mocap_data_show_vpython(vpython_show):
class mocap_data_show(matplotlib_show):
"""Base class for visualizing motion capture data."""
def __init__(self, vals, axes=None, connect=None):
def __init__(self, vals, axes=None, connect=None, color='b'):
if axes==None:
fig = plt.figure()
axes = fig.add_subplot(111, projection='3d', aspect='equal')
matplotlib_show.__init__(self, vals, axes)
self.color = color
self.connect = connect
self.process_values()
self.initialize_axes()
@ -423,7 +424,7 @@ class mocap_data_show(matplotlib_show):
self.axes.figure.canvas.draw()
def draw_vertices(self):
self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2])
self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2], color=self.color)
def draw_edges(self):
self.line_handle = []
@ -442,7 +443,7 @@ class mocap_data_show(matplotlib_show):
z.append(self.vals[i, 2])
z.append(self.vals[j, 2])
z.append(np.NaN)
self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), 'b-')
self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), '-', color=self.color)
def modify(self, vals):
self.vals = vals.copy()
@ -450,7 +451,7 @@ class mocap_data_show(matplotlib_show):
self.initialize_axes_modify()
self.draw_vertices()
self.initialize_axes()
self.finalize_axes_modify()
#self.finalize_axes_modify()
self.draw_edges()
self.axes.figure.canvas.draw()
@ -469,12 +470,20 @@ class mocap_data_show(matplotlib_show):
self.line_handle[0].remove()
def finalize_axes(self):
self.axes.set_xlim(self.x_lim)
self.axes.set_ylim(self.y_lim)
self.axes.set_zlim(self.z_lim)
self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.])
# self.axes.set_xlim(self.x_lim)
# self.axes.set_ylim(self.y_lim)
# self.axes.set_zlim(self.z_lim)
# self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.])
# self.axes.set_aspect('equal')
extents = np.array([getattr(self.axes, 'get_{}lim'.format(dim))() for dim in 'xyz'])
sz = extents[:,1] - extents[:,0]
centers = np.mean(extents, axis=1)
maxsize = max(abs(sz))
r = maxsize/2
for ctr, dim in zip(centers, 'xyz'):
getattr(self.axes, 'set_{}lim'.format(dim))(ctr - r, ctr + r)
# self.axes.set_aspect('equal')
# self.axes.autoscale(enable=False)
def finalize_axes_modify(self):
@ -494,7 +503,7 @@ class stick_show(mocap_data_show):
class skeleton_show(mocap_data_show):
"""data_show class for visualizing motion capture data encoded as a skeleton with angles."""
def __init__(self, vals, skel, axes=None, padding=0):
def __init__(self, vals, skel, axes=None, padding=0, color='b'):
"""data_show class for visualizing motion capture data encoded as a skeleton with angles.
:param vals: set of modeled angles to use for printing in the axis when it's first created.
:type vals: np.array
@ -506,7 +515,7 @@ class skeleton_show(mocap_data_show):
self.skel = skel
self.padding = padding
connect = skel.connection_matrix()
mocap_data_show.__init__(self, vals, axes=axes, connect=connect)
mocap_data_show.__init__(self, vals, axes=axes, connect=connect, color=color)
def process_values(self):
"""Takes a set of angles and converts them to the x,y,z coordinates in the internal prepresentation of the class, ready for plotting.

View file

@ -0,0 +1,65 @@
import numpy as np
import scipy as sp
from GPy.util import choleskies
import GPy
"""
These tests make sure that the opure python and cython codes work the same
"""
class CythonTestChols(np.testing.TestCase):
def setUp(self):
self.flat = np.random.randn(45,5)
self.triang = np.array([np.eye(20) for i in range(3)])
def test_flat_to_triang(self):
L1 = choleskies._flat_to_triang_pure(self.flat)
L2 = choleskies._flat_to_triang_cython(self.flat)
np.testing.assert_allclose(L1, L2)
def test_triang_to_flat(self):
A1 = choleskies._triang_to_flat_pure(self.triang)
A2 = choleskies._triang_to_flat_cython(self.triang)
np.testing.assert_allclose(A1, A2)
class test_stationary(np.testing.TestCase):
def setUp(self):
self.k = GPy.kern.RBF(10)
self.X = np.random.randn(300,10)
self.Z = np.random.randn(20,10)
self.dKxx = np.random.randn(300,300)
self.dKzz = np.random.randn(20,20)
self.dKxz = np.random.randn(300,20)
def test_square_gradX(self):
g1 = self.k._gradients_X_cython(self.dKxx, self.X)
g2 = self.k._gradients_X_pure(self.dKxx, self.X)
np.testing.assert_allclose(g1, g2)
def test_rect_gradx(self):
g1 = self.k._gradients_X_cython(self.dKxz, self.X, self.Z)
g2 = self.k._gradients_X_pure(self.dKxz, self.X, self.Z)
np.testing.assert_allclose(g1, g2)
def test_square_lengthscales(self):
g1 = self.k._lengthscale_grads_pure(self.dKxx, self.X, self.X)
g2 = self.k._lengthscale_grads_cython(self.dKxx, self.X, self.X)
np.testing.assert_allclose(g1, g2)
def test_rect_lengthscales(self):
g1 = self.k._lengthscale_grads_pure(self.dKxz, self.X, self.Z)
g2 = self.k._lengthscale_grads_cython(self.dKxz, self.X, self.Z)
np.testing.assert_allclose(g1, g2)
class test_choleskies_backprop(np.testing.TestCase):
def setUp(self):
self.dL, self.L = np.random.randn(2, 100, 100)
def test(self):
r1 = GPy.util.choleskies._backprop_gradient_pure(self.dL, self.L)
r2 = GPy.util.choleskies.choleskies_cython.backprop_gradient(self.dL, self.L)
np.testing.assert_allclose(r1, r2)

View file

@ -366,9 +366,9 @@ class KernelTestsNonContinuous(unittest.TestCase):
X2 = self.X2[self.X2[:,-1]!=2]
self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
class Coregionalize_weave_test(unittest.TestCase):
class Coregionalize_cython_test(unittest.TestCase):
"""
Make sure that the coregionalize kernel work with and without weave enabled
Make sure that the coregionalize kernel work with and without cython enabled
"""
def setUp(self):
self.k = GPy.kern.Coregionalize(1, output_dim=12)
@ -378,36 +378,42 @@ class Coregionalize_weave_test(unittest.TestCase):
def test_sym(self):
dL_dK = np.random.randn(self.N1, self.N1)
GPy.util.config.config.set('weave', 'working', 'True')
K_weave = self.k.K(self.X)
GPy.util.config.config.set('cython', 'working', 'True')
K_cython = self.k.K(self.X)
self.k.update_gradients_full(dL_dK, self.X)
grads_weave = self.k.gradient.copy()
grads_cython = self.k.gradient.copy()
GPy.util.config.config.set('weave', 'working', 'False')
GPy.util.config.config.set('cython', 'working', 'False')
K_numpy = self.k.K(self.X)
self.k.update_gradients_full(dL_dK, self.X)
grads_numpy = self.k.gradient.copy()
self.assertTrue(np.allclose(K_numpy, K_weave))
self.assertTrue(np.allclose(grads_numpy, grads_weave))
self.assertTrue(np.allclose(K_numpy, K_cython))
self.assertTrue(np.allclose(grads_numpy, grads_cython))
#reset the cython state for any other tests
GPy.util.config.config.set('cython', 'working', 'true')
def test_nonsym(self):
dL_dK = np.random.randn(self.N1, self.N2)
GPy.util.config.config.set('weave', 'working', 'True')
K_weave = self.k.K(self.X, self.X2)
GPy.util.config.config.set('cython', 'working', 'True')
K_cython = self.k.K(self.X, self.X2)
self.k.gradient = 0.
self.k.update_gradients_full(dL_dK, self.X, self.X2)
grads_weave = self.k.gradient.copy()
grads_cython = self.k.gradient.copy()
GPy.util.config.config.set('weave', 'working', 'False')
GPy.util.config.config.set('cython', 'working', 'False')
K_numpy = self.k.K(self.X, self.X2)
self.k.gradient = 0.
self.k.update_gradients_full(dL_dK, self.X, self.X2)
grads_numpy = self.k.gradient.copy()
self.assertTrue(np.allclose(K_numpy, K_weave))
self.assertTrue(np.allclose(grads_numpy, grads_weave))
self.assertTrue(np.allclose(K_numpy, K_cython))
self.assertTrue(np.allclose(grads_numpy, grads_cython))
#reset the cython state for any other tests
GPy.util.config.config.set('cython', 'working', 'true')
#reset the weave state for any other tests
GPy.util.config.config.set('weave', 'working', 'False')
class KernelTestsProductWithZeroValues(unittest.TestCase):

View file

@ -93,6 +93,9 @@ def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None,
if not grad.checkgrad(verbose=True):
gradchecking = False
if not grad.checkgrad(verbose=True):
gradchecking = False
return gradchecking
@ -116,6 +119,7 @@ class TestNoiseModels(object):
self.integer_Y = np.where(tmp > 0, tmp, 0)
self.var = 0.2
self.deg_free = 4.0
#Make a bigger step as lower bound can be quite curved
self.step = 1e-4
@ -135,7 +139,7 @@ class TestNoiseModels(object):
}
"""
self.noise_models = {"Student_t_default": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [self.var],
@ -143,8 +147,17 @@ class TestNoiseModels(object):
},
"laplace": True
},
#"Student_t_deg_free": {
#"model": GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.var),
#"grad_params": {
#"names": [".*deg_free"],
#"vals": [self.deg_free],
#"constraints": [(".*t_scale2", self.constrain_fixed), (".*deg_free", self.constrain_positive)]
#},
#"laplace": True
#},
"Student_t_1_var": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [1.0],
@ -162,7 +175,7 @@ class TestNoiseModels(object):
"laplace": True
},
"Student_t_small_var": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [0.001],
@ -171,7 +184,7 @@ class TestNoiseModels(object):
"laplace": True
},
"Student_t_large_var": {
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.var),
"grad_params": {
"names": [".*t_scale2"],
"vals": [10.0],

View file

@ -1,6 +1,7 @@
import numpy as np
import scipy as sp
from ..util.linalg import jitchol
from GPy.util.linalg import jitchol
import GPy
class LinalgTests(np.testing.TestCase):
def setUp(self):
@ -35,3 +36,25 @@ class LinalgTests(np.testing.TestCase):
return False
except sp.linalg.LinAlgError:
return True
def test_einsum_ijk_jlk_to_il(self):
A = np.random.randn(50, 150, 5)
B = np.random.randn(150, 100, 5)
pure = np.einsum('ijk,jlk->il', A, B)
quick = GPy.util.linalg.ijk_jlk_to_il(A, B)
np.testing.assert_allclose(pure, quick)
def test_einsum_ij_jlk_to_ilk(self):
A = np.random.randn(15, 150, 5)
B = np.random.randn(150, 50, 5)
pure = np.einsum('ijk,jlk->il', A, B)
quick = GPy.util.linalg.ijk_jlk_to_il(A,B)
np.testing.assert_allclose(pure, quick)
def test_einsum_ijk_ljk_to_ilk(self):
A = np.random.randn(150, 20, 5)
B = np.random.randn(150, 20, 5)
#B = A.copy()
pure = np.einsum('ijk,ljk->ilk', A, B)
quick = GPy.util.linalg.ijk_ljk_to_ilk(A,B)
np.testing.assert_allclose(pure, quick)

View file

@ -49,7 +49,7 @@ class LinkFunctionTests(np.testing.TestCase):
self.assertTrue(grad3.checkgrad(verbose=True))
if test_lim:
print "Testing limits"
print("Testing limits")
#Remove some otherwise we are too close to the limit for gradcheck to work effectively
lim_of_inf = lim_of_inf - 1e-4
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf)

View file

@ -100,10 +100,10 @@ def block_dot(A, B, diagonal=False):
Dshape = D.shape
if diagonal and (len(Cshape) == 1 or len(Dshape) == 1\
or C.shape[0] != C.shape[1] or D.shape[0] != D.shape[1]):
print "Broadcasting, C: {} D:{}".format(C.shape, D.shape)
print("Broadcasting, C: {} D:{}".format(C.shape, D.shape))
return C*D
else:
print "Dotting, C: {} C:{}".format(C.shape, D.shape)
print("Dotting, C: {} C:{}".format(C.shape, D.shape))
return np.dot(C,D)
dot = np.vectorize(f, otypes = [np.object])
return dot(A,B)

View file

@ -1,14 +1,11 @@
# Copyright James Hensman and Max Zwiessele 2014
# Copyright James Hensman and Max Zwiessele 2014, 2015
# Licensed under the GNU GPL version 3.0
import numpy as np
from . import linalg
from .config import config
try:
from scipy import weave
except ImportError:
config.set('weave', 'working', 'False')
from . import choleskies_cython
def safe_root(N):
i = np.sqrt(N)
@ -17,125 +14,72 @@ def safe_root(N):
raise ValueError("N is not square!")
return j
def _flat_to_triang_weave(flat):
"""take a matrix N x D and return a M X M x D array where
N = M(M+1)/2
the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
This is the weave implementation
"""
N, D = flat.shape
M = (-1 + safe_root(8*N+1))/2
ret = np.zeros((M, M, D))
flat = np.ascontiguousarray(flat)
code = """
int count = 0;
for(int m=0; m<M; m++)
{
for(int mm=0; mm<=m; mm++)
{
for(int d=0; d<D; d++)
{
ret[d + m*D*M + mm*D] = flat[count];
count++;
}
}
}
"""
weave.inline(code, ['flat', 'ret', 'D', 'M'])
return ret
def _flat_to_triang_pure(flat_mat):
N, D = flat_mat.shape
M = (-1 + safe_root(8*N+1))//2
ret = np.zeros((M, M, D))
count = 0
for m in range(M):
for mm in range(m+1):
for d in range(D):
ret.flat[d + m*D*M + mm*D] = flat_mat.flat[count];
ret = np.zeros((D, M, M))
for d in range(D):
count = 0
for m in range(M):
for mm in range(m+1):
ret[d,m, mm] = flat_mat[count, d];
count = count+1
return ret
if config.getboolean('weave', 'working'):
flat_to_triang = _flat_to_triang_weave
else:
flat_to_triang = _flat_to_triang_pure
def _flat_to_triang_cython(flat_mat):
N, D = flat_mat.shape
M = (-1 + safe_root(8*N+1))//2
return choleskies_cython.flat_to_triang(flat_mat, M)
def _triang_to_flat_weave(L):
M, _, D = L.shape
L = np.ascontiguousarray(L) # should do nothing if L was created by flat_to_triang
N = M*(M+1)/2
flat = np.empty((N, D))
code = """
int count = 0;
for(int m=0; m<M; m++)
{
for(int mm=0; mm<=m; mm++)
{
for(int d=0; d<D; d++)
{
flat[count] = L[d + m*D*M + mm*D];
count++;
}
}
}
"""
weave.inline(code, ['flat', 'L', 'D', 'M'])
return flat
def _triang_to_flat_pure(L):
M, _, D = L.shape
D, _, M = L.shape
N = M*(M+1)//2
flat = np.empty((N, D))
count = 0;
for m in range(M):
for mm in range(m+1):
for d in range(D):
flat.flat[count] = L.flat[d + m*D*M + mm*D];
for d in range(D):
count = 0;
for m in range(M):
for mm in range(m+1):
flat[count,d] = L[d, m, mm]
count = count +1
return flat
if config.getboolean('weave', 'working'):
triang_to_flat = _triang_to_flat_weave
else:
triang_to_flat = _triang_to_flat_pure
def _triang_to_flat_cython(L):
return choleskies_cython.triang_to_flat(L)
def _backprop_gradient_pure(dL, L):
"""
Given the derivative of an objective fn with respect to the cholesky L,
compute the derivate with respect to the original matrix K, defined as
K = LL^T
where L was obtained by Cholesky decomposition
"""
dL_dK = np.tril(dL).copy()
N = L.shape[0]
for k in range(N - 1, -1, -1):
for j in range(k + 1, N):
for i in range(j, N):
dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
for j in range(k + 1, N):
dL_dK[j, k] /= L[k, k]
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2 * L[k, k])
return dL_dK
def triang_to_cov(L):
return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in range(L.shape[-1])])
def multiple_dpotri_old(Ls):
M, _, D = Ls.shape
Kis = np.rollaxis(Ls, -1).copy()
[dpotri(Kis[i,:,:], overwrite_c=1, lower=1) for i in range(D)]
code = """
for(int d=0; d<D; d++)
{
for(int m=0; m<M; m++)
{
for(int mm=0; mm<m; mm++)
{
Kis[d*M*M + mm*M + m ] = Kis[d*M*M + m*M + mm];
}
}
}
"""
weave.inline(code, ['Kis', 'D', 'M'])
Kis = np.rollaxis(Kis, 0, 3) #wtf rollaxis?
return Kis
def multiple_dpotri(Ls):
return np.dstack([linalg.dpotri(np.asfortranarray(Ls[:,:,i]), lower=1)[0] for i in range(Ls.shape[-1])])
return np.array([linalg.dpotri(np.asfortranarray(Ls[i]), lower=1)[0] for i in range(Ls.shape[0])])
def indexes_to_fix_for_low_rank(rank, size):
"""
work out which indexes of the flatteneed array should be fixed if we want the cholesky to represent a low rank matrix
Work out which indexes of the flatteneed array should be fixed if we want
the cholesky to represent a low rank matrix
"""
#first we'll work out what to keep, and the do the set difference.
@ -153,15 +97,11 @@ def indexes_to_fix_for_low_rank(rank, size):
return np.setdiff1d(np.arange((size**2+size)/2), keep)
#class cholchecker(GPy.core.Model):
#def __init__(self, L, name='cholchecker'):
#super(cholchecker, self).__init__(name)
#self.L = GPy.core.Param('L',L)
#self.link_parameter(self.L)
#def parameters_changed(self):
#LL = flat_to_triang(self.L)
#Ki = multiple_dpotri(LL)
#self.L.gradient = 2*np.einsum('ijk,jlk->ilk', Ki, LL)
#self._loglik = np.sum([np.sum(np.log(np.abs(np.diag()))) for i in range(self.L.shape[-1])])
#
if config.getboolean('cython', 'working'):
triang_to_flat = _triang_to_flat_cython
flat_to_triang = _flat_to_triang_cython
backprop_gradient = choleskies_cython.backprop_gradient_par_c
else:
backprop_gradient = _backprop_gradient_pure
triang_to_flat = _triang_to_flat_pure
flat_to_triang = _flat_to_triang_pure

21096
GPy/util/choleskies_cython.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,107 @@
#cython: wraparaound=False
#cython: boundscheck=False
#cython: nonecheck=False
# Copyright James Hensman and Alan Saul 2015
import numpy as np
from cython.parallel import prange, parallel
cimport numpy as np
def flat_to_triang(np.ndarray[double, ndim=2] flat, int M):
"""take a matrix N x D and return a D X M x M array where
N = M(M+1)/2
the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
"""
cdef int D = flat.shape[1]
cdef int N = flat.shape[0]
cdef int count = 0
cdef np.ndarray[double, ndim=3] ret = np.zeros((D, M, M))
cdef int d, m, mm
for d in range(D):
count = 0
for m in range(M):
for mm in range(m+1):
ret[d, m, mm] = flat[count,d]
count += 1
return ret
def triang_to_flat(np.ndarray[double, ndim=3] L):
cdef int D = L.shape[0]
cdef int M = L.shape[1]
cdef int N = M*(M+1)/2
cdef int count = 0
cdef np.ndarray[double, ndim=2] flat = np.empty((N, D))
cdef int d, m, mm
for d in range(D):
count = 0
for m in range(M):
for mm in range(m+1):
flat[count,d] = L[d, m, mm]
count += 1
return flat
def backprop_gradient(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL).copy()
cdef int N = L.shape[0]
cdef int k, j, i
for k in range(N - 1, -1, -1):
for j in range(k + 1, N):
for i in range(j, N):
dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
for j in range(k + 1, N):
dL_dK[j, k] /= L[k, k]
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2. * L[k, k])
return dL_dK
def backprop_gradient_par(double[:,:] dL, double[:,:] L):
cdef double[:,:] dL_dK = np.tril(dL).copy()
cdef int N = L.shape[0]
cdef int k, j, i
for k in range(N - 1, -1, -1):
with nogil, parallel():
for i in prange(k + 1, N):
for j in range(k+1, i+1):
dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
for j in range(i, N):
dL_dK[i, k] -= dL_dK[j, i] * L[j, k]
for j in range(k + 1, N):
dL_dK[j, k] /= L[k, k]
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2. * L[k, k])
return dL_dK
#here's a pure C version...
cdef extern from "cholesky_backprop.h" nogil:
void chol_backprop(int N, double* dL, double* L)
def backprop_gradient_par_c(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL) # makes a copy, c-contig
cdef int N = L.shape[0]
with nogil:
chol_backprop(N, <double*> dL_dK.data, <double*> L.data)
return dL_dK
cdef extern from "cholesky_backprop.h" nogil:
void old_chol_backprop(int N, double* dL, double* L)
def backprop_gradient_par_c_old(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL) # makes a copy, c-contig
cdef int N = L.shape[0]
with nogil:
old_chol_backprop(N, <double*> dL_dK.data, <double*> L.data)
return dL_dK

View file

@ -0,0 +1,51 @@
#include <cblas.h>
void chol_backprop(int N, double* dL, double* L){
//at the input to this fn, dL is df_dL. after this fn is complet, dL is df_dK
int i,k;
dL[N*N - 1] /= (2. * L[N*N - 1]);
for(k=N-2;k>(-1);k--){
cblas_dsymv(CblasRowMajor, CblasLower,
N-k-1, -1,
&dL[(N*(k+1) + k+1)],N,
&L[k*N+k+1],1,
1, &dL[N*(k+1)+k], N);
for(i=0;i<(N-k-1); i++){
dL[N*(k+1+i)+k] -= dL[N*(k+1)+k+i*(N+1)+1] * L[k*N+k+1+i];
}
cblas_dscal(N-k-1, 1.0/L[k*N+k], &dL[(k+1)*N+k], N);
dL[k*N + k] -= cblas_ddot(N-k-1, &dL[(k+1)*N+k], N, &L[k*N+k+1], 1);
dL[k*N + k] /= (2.0 * L[k*N + k]);
}
}
double mydot(int n, double* a, int stride_a, double* b, int stride_b){
double ret = 0;
for(int i=0; i<n; i++){
ret += a[i*stride_a]*b[i*stride_b];
}
return ret;
}
void old_chol_backprop(int N, double* dL, double* U){
//at the input to this fn, dL is df_dL. after this fn is complet, dL is df_dK
int iN, kN,i,j,k;
dL[N*N-1] /= (2. * U[N*N-1]);
for(k=N-2;k>(-1);k--){
kN = k*N;
#pragma omp parallel for private(i,iN)
for(i=k+1; i<N; i++){
iN = i*N;
dL[iN+k] -= mydot(i-k, &dL[iN+k+1], 1, &U[kN+k+1], 1);
dL[iN+k] -= mydot(N-i, &dL[iN+i], N, &U[kN+i], 1);
}
for(i=(k + 1); i<N; i++){
iN = i*N;
dL[iN + k] /= U[kN + k];
dL[kN + k] -= U[kN + i] * dL[iN + k];
}
dL[kN + k] /= (2. * U[kN + k]);
}
}

View file

@ -0,0 +1,5 @@
#include <cblas.h>
void dsymv(int N, double*A, double*b, double*y);
double mydot(int n, double* a, int stride_a, double* b, int stride_b);
void chol_backprop(int N, double* dL, double* L);

View file

@ -15,11 +15,7 @@ import warnings
import os
from .config import config
import logging
try:
from scipy import weave
except ImportError:
config.set('weave', 'working', 'False')
from . import linalg_cython
_scipyversion = np.float64((scipy.__version__).split('.')[:2])
@ -422,114 +418,33 @@ def DSYR(*args, **kwargs):
def symmetrify(A, upper=False):
"""
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
Take the square matrix A and make it symmetrical by copting elements from
the lower half to the upper
works IN PLACE.
note: tries to use weave, falls back to a slower numpy version
note: tries to use cython, falls back to a slower numpy version
"""
if config.getboolean('weave', 'working'):
try:
symmetrify_weave(A, upper)
except:
print("\n Weave compilation failed. Falling back to (slower) numpy implementation\n")
config.set('weave', 'working', 'False')
symmetrify_numpy(A, upper)
if config.getboolean('cython', 'working'):
_symmetrify_cython(A, upper)
else:
symmetrify_numpy(A, upper)
_symmetrify_numpy(A, upper)
def symmetrify_weave(A, upper=False):
"""
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
def _symmetrify_cython(A, upper=False):
return linalg_cython.symmetrify(A, upper)
works IN PLACE.
"""
N, M = A.shape
assert N == M
c_contig_code = """
int iN;
for (int i=1; i<N; i++){
iN = i*N;
for (int j=0; j<i; j++){
A[i+j*N] = A[iN+j];
}
}
"""
f_contig_code = """
int iN;
for (int i=1; i<N; i++){
iN = i*N;
for (int j=0; j<i; j++){
A[iN+j] = A[i+j*N];
}
}
"""
N = int(N) # for safe type casting
if A.flags['C_CONTIGUOUS'] and upper:
weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
elif A.flags['C_CONTIGUOUS'] and not upper:
weave.inline(c_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and upper:
weave.inline(c_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
elif A.flags['F_CONTIGUOUS'] and not upper:
weave.inline(f_contig_code, ['A', 'N'], extra_compile_args=['-O3'])
else:
if upper:
tmp = np.tril(A.T)
else:
tmp = np.tril(A)
A[:] = 0.0
A += tmp
A += np.tril(tmp, -1).T
def symmetrify_numpy(A, upper=False):
"""
Force a matrix to be symmetric
"""
def _symmetrify_numpy(A, upper=False):
triu = np.triu_indices_from(A,k=1)
if upper:
A.T[triu] = A[triu]
else:
A[triu] = A.T[triu]
#This function appears to be unused. It's use of weave makes it problematic
#Commenting out for now
#def cholupdate(L, x):
# """
# update the LOWER cholesky factor of a pd matrix IN PLACE
#
# if L is the lower chol. of K, then this function computes L\_
# where L\_ is the lower chol of K + x*x^T
# """
# support_code = """
# #include <math.h>
# """
# code = """
# double r,c,s;
# int j,i;
# for(j=0; j<N; j++){
# r = sqrt(L(j,j)*L(j,j) + x(j)*x(j));
# c = r / L(j,j);
# s = x(j) / L(j,j);
# L(j,j) = r;
# for (i=j+1; i<N; i++){
# L(i,j) = (L(i,j) + s*x(i))/c;
# x(i) = c*x(i) - s*L(i,j);
# }
# }
# """
# x = x.copy()
# N = x.size
# weave.inline(code, support_code=support_code, arg_names=['N', 'L', 'x'], type_converters=weave.converters.blitz)
def backsub_both_sides(L, X, transpose='left'):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
"""
Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky
"""
if transpose == 'left':
tmp, _ = dtrtrs(L, X, lower=1, trans=1)
return dtrtrs(L, tmp.T, lower=1, trans=1)[0].T
@ -537,3 +452,27 @@ def backsub_both_sides(L, X, transpose='left'):
tmp, _ = dtrtrs(L, X, lower=1, trans=0)
return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T
def ij_jlk_to_ilk(A, B):
"""
Faster version of einsum 'ij,jlk->ilk'
"""
return A.dot(B.reshape(B.shape[0], -1)).reshape(A.shape[0], B.shape[1], B.shape[2])
def ijk_jlk_to_il(A, B):
"""
Faster version of einsum einsum('ijk,jlk->il', A,B)
"""
res = np.zeros((A.shape[0], B.shape[1]))
[np.add(np.dot(A[:,:,k], B[:,:,k]), res, out=res) for k in range(B.shape[-1])]
return res
def ijk_ljk_to_ilk(A, B):
"""
Faster version of einsum np.einsum('ijk,ljk->ilk', A, B)
I.e A.dot(B.T) for every dimension
"""
res = np.zeros((A.shape[-1], A.shape[0], B.shape[0]))
[np.dot(A[:,:,i], B[:,:,i].T, out=res[i,:,:]) for i in range(A.shape[-1])]
res = res.swapaxes(0, 2).swapaxes(0,1)
return res

6191
GPy/util/linalg_cython.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,34 @@
cimport numpy as np
from cpython cimport bool
import cython
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
def symmetrify(np.ndarray[double, ndim=2] A, bool upper):
cdef int N = A.shape[0]
if not upper:
for i in xrange(N):
for j in xrange(i):
A[j, i] = A[i, j]
else:
for j in xrange(N):
for i in xrange(j):
A[j, i] = A[i, j]
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
def cholupdate(np.ndarray[double, ndim=1] x, np.ndarray[double, ndim=2] L, int N):
cdef double r
cdef double c
cdef double s
for j in xrange(N):
r = np.sqrt(L[j,j]*L[j,j] + x[j]*x[j])
c = r / L[j,j]
s = x[j] / L[j,j]
L[j,j] = r
for i in xrange(j):
L[i,j] = (L[i,j] + s*x[i])/c
x[i] = c*x[i] - s*L[i,j];
r = np.sqrt(L[j,j])

View file

@ -42,9 +42,6 @@ def chain_1(df_dg, dg_dx):
"""
if np.all(dg_dx==1.):
return df_dg
if len(df_dg) > 1 and len(df_dg.shape)>1 and df_dg.shape[-1] > 1:
#import ipdb; ipdb.set_trace() # XXX BREAKPOINT
raise NotImplementedError('Not implemented for matricies yet')
return df_dg * dg_dx
def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
@ -56,8 +53,6 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
"""
if np.all(dg_dx==1.) and np.all(d2g_dx2 == 0):
return d2f_dg2
if len(d2f_dg2) > 1 and len(d2f_dg2.shape)>1 and d2f_dg2.shape[-1] > 1:
raise NotImplementedError('Not implemented for matricies yet')
dg_dx_2 = np.clip(dg_dx, -np.inf, _lim_val_square)**2
#dg_dx_2 = dg_dx**2
return d2f_dg2*(dg_dx_2) + df_dg*d2g_dx2
@ -71,11 +66,7 @@ def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
"""
if np.all(dg_dx==1.) and np.all(d2g_dx2==0) and np.all(d3g_dx3==0):
return d3f_dg3
if ( (len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1)
or (len(d3f_dg3) > 1 and d3f_dg3.shape[-1] > 1)):
raise NotImplementedError('Not implemented for matricies yet')
dg_dx_3 = np.clip(dg_dx, -np.inf, _lim_val_cube)**3
#dg_dx_3 = dg_dx**3
return d3f_dg3*(dg_dx_3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
def opt_wrapper(m, **kwargs):
@ -133,10 +124,11 @@ def kmm_init(X, m = 10):
### make a parameter to its corresponding array:
def param_to_array(*param):
"""
Convert an arbitrary number of parameters to :class:ndarray class objects. This is for
converting parameter objects to numpy arrays, when using scipy.weave.inline routine.
In scipy.weave.blitz there is no automatic array detection (even when the array inherits
from :class:ndarray)"""
Convert an arbitrary number of parameters to :class:ndarray class objects.
This is for converting parameter objects to numpy arrays, when using
scipy.weave.inline routine. In scipy.weave.blitz there is no automatic
array detection (even when the array inherits from :class:ndarray)
"""
import warnings
warnings.warn("Please use param.values, as this function will be deprecated in the next release.", DeprecationWarning)
assert len(param) > 0, "At least one parameter needed"

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from GPy.core.parameterization import Parameterized, Param
from ..core.parameterization import Parameterized, Param
from ..core.parameterization.transformations import Logexp
class WarpingFunction(Parameterized):

View file

@ -6,3 +6,6 @@ include *.cfg
recursive-include doc *.cfg
include *.json
recursive-include doc *.json
recursive-include GPy *.c
recursive-include GPy *.so
recursive-include GPy *.pyx

View file

@ -13,7 +13,6 @@ Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GP
### Python 3 Compatibility
Work is underway to make GPy run on Python 3.
* Python 2.x compatibility is currently broken in this fork
* All tests in the testsuite now run on Python3.
To see this for yourself, in Ubuntu 14.04, you can do
@ -21,12 +20,17 @@ To see this for yourself, in Ubuntu 14.04, you can do
git clone https://github.com/mikecroucher/GPy.git
cd GPy
git checkout devel
python3 setup.py build_ext --inplace
nosetests3 GPy/testing
nosetests3 is Ubuntu's way of reffering to the Python 3 version of nosetests. You install it with
sudo apt-get install python3-nose
The command `python3 setup.py build_ext --inplace` builds the Cython extensions. IF it doesn't work, you may need to install this:
sudo apt-get install python3-dev
* Test coverage is less than 100% so it is expected that there is still more work to be done. We need more tests and examples to try out.
* All weave functions not covered by the test suite are *simply commented out*. Can add equivalents later as test functions become available
* A set of benchmarks would be useful!

View file

@ -2,7 +2,8 @@
# -*- coding: utf-8 -*-
import os
from setuptools import setup
from setuptools import setup, Extension
import numpy as np
# Version number
version = '0.6.1'
@ -10,6 +11,28 @@ version = '0.6.1'
def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read()
#compile_flags = ["-march=native", '-fopenmp', '-O3', ]
compile_flags = [ '-fopenmp', '-O3', ]
ext_mods = [Extension(name='GPy.kern._src.stationary_cython',
sources=['GPy/kern/_src/stationary_cython.c','GPy/kern/_src/stationary_utils.c'],
include_dirs=[np.get_include()],
extra_compile_args=compile_flags,
extra_link_args = ['-lgomp']),
Extension(name='GPy.util.choleskies_cython',
sources=['GPy/util/choleskies_cython.c', 'GPy/util/cholesky_backprop.c'],
include_dirs=[np.get_include()],
extra_link_args = ['-lgomp', '-lblas'],
extra_compile_args=compile_flags+['-std=c99']),
Extension(name='GPy.util.linalg_cython',
sources=['GPy/util/linalg_cython.c'],
include_dirs=[np.get_include()],
extra_compile_args=compile_flags),
Extension(name='GPy.kern._src.coregionalize_cython',
sources=['GPy/kern/_src/coregionalize_cython.c'],
include_dirs=[np.get_include()],
extra_compile_args=compile_flags)]
setup(name = 'GPy',
version = version,
author = read('AUTHORS.txt'),
@ -18,6 +41,7 @@ setup(name = 'GPy',
license = "BSD 3-clause",
keywords = "machine-learning gaussian-processes kernels",
url = "http://sheffieldml.github.com/GPy/",
ext_modules = ext_mods,
packages = ["GPy.models",
"GPy.inference.optimization",
"GPy.inference.mcmc",