Change crescent data to optimize with .optimize()

This commit is contained in:
Neil Lawrence 2015-07-19 11:27:16 +02:00
commit 7c8176e5fd
47 changed files with 30168 additions and 1116 deletions

View file

@ -89,7 +89,6 @@ class GP(Model):
assert mean_function.output_dim == self.output_dim assert mean_function.output_dim == self.output_dim
self.link_parameter(mean_function) self.link_parameter(mean_function)
#find a sensible inference method #find a sensible inference method
logger.info("initializing inference method") logger.info("initializing inference method")
if inference_method is None: if inference_method is None:
@ -208,6 +207,7 @@ class GP(Model):
Kxx = kern.Kdiag(_Xnew) Kxx = kern.Kdiag(_Xnew)
var = Kxx - np.sum(WiKx*Kx, 0) var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1) var = var.reshape(-1, 1)
var[var<0.] = 0.
#force mu to be a column vector #force mu to be a column vector
if len(mu.shape)==1: mu = mu[:,None] if len(mu.shape)==1: mu = mu[:,None]
@ -229,13 +229,14 @@ class GP(Model):
:param Y_metadata: metadata about the predicting point to pass to the likelihood :param Y_metadata: metadata about the predicting point to pass to the likelihood
:param kern: The kernel to use for prediction (defaults to the model :param kern: The kernel to use for prediction (defaults to the model
kern). this is useful for examining e.g. subprocesses. kern). this is useful for examining e.g. subprocesses.
:returns: (mean, var, lower_upper): :returns: (mean, var):
mean: posterior mean, a Numpy array, Nnew x self.input_dim mean: posterior mean, a Numpy array, Nnew x self.input_dim
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
lower_upper: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions. This is to allow for different normalizations of the output dimensions.
Note: If you want the predictive quantiles (e.g. 95% confidence interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
""" """
#predict the latent function values #predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern) mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
@ -255,7 +256,7 @@ class GP(Model):
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval :param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
:type quantiles: tuple :type quantiles: tuple
:returns: list of quantiles for each X and predictive quantiles for interval combination :returns: list of quantiles for each X and predictive quantiles for interval combination
:rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)] :rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
""" """
m, v = self._raw_predict(X, full_cov=False) m, v = self._raw_predict(X, full_cov=False)
if self.normalizer is not None: if self.normalizer is not None:

View file

@ -76,7 +76,7 @@ class Model(Parameterized):
jobs = [] jobs = []
pool = mp.Pool(processes=num_processes) pool = mp.Pool(processes=num_processes)
for i in range(num_restarts): for i in range(num_restarts):
self.randomize() if i>0: self.randomize()
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs) job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
jobs.append(job) jobs.append(job)
@ -90,7 +90,7 @@ class Model(Parameterized):
for i in range(num_restarts): for i in range(num_restarts):
try: try:
if not parallel: if not parallel:
self.randomize() if i>0: self.randomize()
self.optimize(**kwargs) self.optimize(**kwargs)
else: else:
self.optimization_runs.append(jobs[i].get()) self.optimization_runs.append(jobs[i].get())

View file

@ -5,7 +5,7 @@ import numpy
from numpy.lib.function_base import vectorize from numpy.lib.function_base import vectorize
from .lists_and_dicts import IntArrayDict from .lists_and_dicts import IntArrayDict
from functools import reduce from functools import reduce
from transformations import Transformation from .transformations import Transformation
def extract_properties_to_index(index, props): def extract_properties_to_index(index, props):
prop_index = dict() prop_index = dict()

View file

@ -38,6 +38,11 @@ class Param(Parameterizable, ObsAr):
Fixing parameters will fix them to the value they are right now. If you change Fixing parameters will fix them to the value they are right now. If you change
the fixed value, it will be fixed to the new value! the fixed value, it will be fixed to the new value!
Important Note:
Multilevel indexing (e.g. self[:2][1:]) is not supported and might lead to unexpected behaviour.
Try to index in one go, using boolean indexing or the numpy builtin
np.index function.
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc. See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
""" """

View file

@ -430,23 +430,38 @@ class Indexable(Nameable, Updateable):
def log_prior(self): def log_prior(self):
"""evaluate the prior""" """evaluate the prior"""
if self.priors.size > 0: if self.priors.size == 0:
x = self.param_array return 0.
#py3 fix x = self.param_array
#return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0) #evaluate the prior log densities
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0) log_p = reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
return 0.
#account for the transformation by evaluating the log Jacobian (where things are transformed)
log_j = 0.
priored_indexes = np.hstack([i for p, i in self.priors.items()])
for c,j in self.constraints.items():
if not isinstance(c, Transformation):continue
for jj in j:
if jj in priored_indexes:
log_j += c.log_jacobian(x[jj])
return log_p + log_j
def _log_prior_gradients(self): def _log_prior_gradients(self):
"""evaluate the gradients of the priors""" """evaluate the gradients of the priors"""
if self.priors.size > 0: if self.priors.size == 0:
x = self.param_array return 0.
ret = np.zeros(x.size) x = self.param_array
#py3 fix ret = np.zeros(x.size)
#[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()] #compute derivate of prior density
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()] [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
return ret #add in jacobian derivatives if transformed
return 0. priored_indexes = np.hstack([i for p, i in self.priors.items()])
for c,j in self.constraints.items():
if not isinstance(c, Transformation):continue
for jj in j:
if jj in priored_indexes:
ret[jj] += c.log_jacobian_grad(x[jj])
return ret
#=========================================================================== #===========================================================================
# Tie parameters together # Tie parameters together

View file

@ -6,10 +6,10 @@ import numpy; np = numpy
import itertools import itertools
from re import compile, _pattern_type from re import compile, _pattern_type
from .param import ParamConcatenation from .param import ParamConcatenation
from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing from .parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
import logging import logging
from index_operations import ParameterIndexOperationsView from .index_operations import ParameterIndexOperationsView
logger = logging.getLogger("parameters changed meta") logger = logging.getLogger("parameters changed meta")
class ParametersChangedMeta(type): class ParametersChangedMeta(type):

View file

@ -758,12 +758,12 @@ class DGPLVM_Lamda(Prior, Parameterized):
self.sigma2 = sigma2 self.sigma2 = sigma2
# self.x = x # self.x = x
self.lbl = lbl self.lbl = lbl
self.lamda = lamda self.lamda = lamda
self.classnum = lbl.shape[1] self.classnum = lbl.shape[1]
self.datanum = lbl.shape[0] self.datanum = lbl.shape[0]
self.x_shape = x_shape self.x_shape = x_shape
self.dim = x_shape[1] self.dim = x_shape[1]
self.lamda = Param('lamda', np.diag(lamda)) self.lamda = Param('lamda', np.diag(lamda))
self.link_parameter(self.lamda) self.link_parameter(self.lamda)
def get_class_label(self, y): def get_class_label(self, y):
@ -789,7 +789,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
M_i = np.zeros((self.classnum, self.dim)) M_i = np.zeros((self.classnum, self.dim))
for i in cls: for i in cls:
# Mean of each class # Mean of each class
class_i = cls[i] class_i = cls[i]
M_i[i] = np.mean(class_i, axis=0) M_i[i] = np.mean(class_i, axis=0)
return M_i return M_i
@ -899,8 +899,8 @@ class DGPLVM_Lamda(Prior, Parameterized):
#!!!!!!!!!!!!!!!!!!!!!!!!!!! #!!!!!!!!!!!!!!!!!!!!!!!!!!!
#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum() #self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()
xprime = x.dot(np.diagflat(self.lamda)) xprime = x.dot(np.diagflat(self.lamda))
x = xprime x = xprime
# print x # print x
cls = self.compute_cls(x) cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0) M_0 = np.mean(x, axis=0)
@ -910,14 +910,14 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw)) return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function # This function calculates derivative of the log of prior function
def lnpdf_grad(self, x): def lnpdf_grad(self, x):
x = x.reshape(self.x_shape) x = x.reshape(self.x_shape)
xprime = x.dot(np.diagflat(self.lamda)) xprime = x.dot(np.diagflat(self.lamda))
x = xprime x = xprime
# print x # print x
cls = self.compute_cls(x) cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0) M_0 = np.mean(x, axis=0)
@ -934,7 +934,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1) #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0] #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0] Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N) Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw) Sw_trans = np.transpose(Sw)
@ -951,14 +951,14 @@ class DGPLVM_Lamda(Prior, Parameterized):
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
DPxprim_Dx = DPxprim_Dx.T DPxprim_Dx = DPxprim_Dx.T
DPxprim_Dlamda = DPx_Dx.dot(x) DPxprim_Dlamda = DPx_Dx.dot(x)
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!) # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
DPxprim_Dlamda = DPxprim_Dlamda.T DPxprim_Dlamda = DPxprim_Dlamda.T
self.lamda.gradient = np.diag(DPxprim_Dlamda) self.lamda.gradient = np.diag(DPxprim_Dlamda)
# print DPxprim_Dx # print DPxprim_Dx
return DPxprim_Dx return DPxprim_Dx
# def frb(self, x): # def frb(self, x):
@ -1139,8 +1139,8 @@ class DGPLVM_T(Prior):
# This function calculates log of our prior # This function calculates log of our prior
def lnpdf(self, x): def lnpdf(self, x):
x = x.reshape(self.x_shape) x = x.reshape(self.x_shape)
xprim = x.dot(self.vec) xprim = x.dot(self.vec)
x = xprim x = xprim
# print x # print x
cls = self.compute_cls(x) cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0) M_0 = np.mean(x, axis=0)
@ -1156,11 +1156,11 @@ class DGPLVM_T(Prior):
# This function calculates derivative of the log of prior function # This function calculates derivative of the log of prior function
def lnpdf_grad(self, x): def lnpdf_grad(self, x):
x = x.reshape(self.x_shape) x = x.reshape(self.x_shape)
xprim = x.dot(self.vec) xprim = x.dot(self.vec)
x = xprim x = xprim
# print x # print x
cls = self.compute_cls(x) cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0) M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls) M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0) Sb = self.compute_Sb(cls, M_i, M_0)

View file

@ -31,6 +31,16 @@ class Transformation(object):
raise NotImplementedError raise NotImplementedError
def finv(self, model_param): def finv(self, model_param):
raise NotImplementedError raise NotImplementedError
def log_jacobian(self, model_param):
"""
compute the log of the jacobian of f, evaluated at f(x)= model_param
"""
raise NotImplementedError
def log_jacobian_grad(self, model_param):
"""
compute the drivative of the log of the jacobian of f, evaluated at f(x)= model_param
"""
raise NotImplementedError
def gradfactor(self, model_param, dL_dmodel_param): def gradfactor(self, model_param, dL_dmodel_param):
""" df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param, """ df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param,
@ -74,9 +84,33 @@ class Logexp(Transformation):
if np.any(f < 0.): if np.any(f < 0.):
print("Warning: changing parameters to satisfy constraints") print("Warning: changing parameters to satisfy constraints")
return np.abs(f) return np.abs(f)
def log_jacobian(self, model_param):
return np.where(model_param>_lim_val, model_param, np.log(np.exp(model_param+1e-20) - 1.)) - model_param
def log_jacobian_grad(self, model_param):
return 1./(np.exp(model_param)-1.)
def __str__(self): def __str__(self):
return '+ve' return '+ve'
class Exponent(Transformation):
domain = _POSITIVE
def f(self, x):
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
def finv(self, x):
return np.log(x)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, f)
def initialize(self, f):
if np.any(f < 0.):
print("Warning: changing parameters to satisfy constraints")
return np.abs(f)
def log_jacobian(self, model_param):
return np.log(model_param)
def log_jacobian_grad(self, model_param):
return 1./model_param
def __str__(self):
return '+ve'
class NormalTheta(Transformation): class NormalTheta(Transformation):
"Do not use, not officially supported!" "Do not use, not officially supported!"
@ -417,22 +451,6 @@ class LogexpClipped(Logexp):
def __str__(self): def __str__(self):
return '+ve_c' return '+ve_c'
class Exponent(Transformation):
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative Exponent below. See old MATLAB code.
domain = _POSITIVE
def f(self, x):
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
def finv(self, x):
return np.log(x)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, f)
def initialize(self, f):
if np.any(f < 0.):
print("Warning: changing parameters to satisfy constraints")
return np.abs(f)
def __str__(self):
return '+ve'
class NegativeExponent(Exponent): class NegativeExponent(Exponent):
domain = _NEGATIVE domain = _NEGATIVE
def f(self, x): def f(self, x):

View file

@ -36,8 +36,9 @@ class NormalPrior(VariationalPrior):
variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5 variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5
class SpikeAndSlabPrior(VariationalPrior): class SpikeAndSlabPrior(VariationalPrior):
def __init__(self, pi=None, learnPi=False, variance = 1.0, name='SpikeAndSlabPrior', **kw): def __init__(self, pi=None, learnPi=False, variance = 1.0, group_spike=False, name='SpikeAndSlabPrior', **kw):
super(SpikeAndSlabPrior, self).__init__(name=name, **kw) super(SpikeAndSlabPrior, self).__init__(name=name, **kw)
self.group_spike = group_spike
self.variance = Param('variance',variance) self.variance = Param('variance',variance)
self.learnPi = learnPi self.learnPi = learnPi
if learnPi: if learnPi:
@ -50,7 +51,10 @@ class SpikeAndSlabPrior(VariationalPrior):
def KL_divergence(self, variational_posterior): def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.gamma.values if self.group_spike:
gamma = variational_posterior.gamma.values[0]
else:
gamma = variational_posterior.gamma.values
if len(self.pi.shape)==2: if len(self.pi.shape)==2:
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1]) idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx] pi = self.pi[idx]
@ -65,14 +69,21 @@ class SpikeAndSlabPrior(VariationalPrior):
def update_gradients_KL(self, variational_posterior): def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.gamma.values if self.group_spike:
gamma = variational_posterior.gamma.values[0]
else:
gamma = variational_posterior.gamma.values
if len(self.pi.shape)==2: if len(self.pi.shape)==2:
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1]) idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx] pi = self.pi[idx]
else: else:
pi = self.pi pi = self.pi
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2. if self.group_spike:
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))/variational_posterior.num_data
else:
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= gamma*mu/self.variance mu.gradient -= gamma*mu/self.variance
S.gradient -= (1./self.variance - 1./S) * gamma /2. S.gradient -= (1./self.variance - 1./S) * gamma /2.
if self.learnPi: if self.learnPi:
@ -154,13 +165,31 @@ class SpikeAndSlabPosterior(VariationalPosterior):
''' '''
The SpikeAndSlab distribution for variational approximations. The SpikeAndSlab distribution for variational approximations.
''' '''
def __init__(self, means, variances, binary_prob, name='latent space'): def __init__(self, means, variances, binary_prob, group_spike=False, sharedX=False, name='latent space'):
""" """
binary_prob : the probability of the distribution on the slab part. binary_prob : the probability of the distribution on the slab part.
""" """
super(SpikeAndSlabPosterior, self).__init__(means, variances, name) super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
self.gamma = Param("binary_prob",binary_prob,Logistic(0.,1.)) self.group_spike = group_spike
self.link_parameter(self.gamma) self.sharedX = sharedX
if sharedX:
self.mean.fix(warning=False)
self.variance.fix(warning=False)
if group_spike:
self.gamma_group = Param("binary_prob_group",binary_prob.mean(axis=0),Logistic(1e-10,1.-1e-10))
self.gamma = Param("binary_prob",binary_prob, __fixed__)
self.link_parameters(self.gamma_group,self.gamma)
else:
self.gamma = Param("binary_prob",binary_prob,Logistic(1e-10,1.-1e-10))
self.link_parameter(self.gamma)
def propogate_val(self):
if self.group_spike:
self.gamma.values[:] = self.gamma_group.values
def collate_gradient(self):
if self.group_spike:
self.gamma_group.gradient = self.gamma.gradient.reshape(self.gamma.shape).sum(axis=0)
def set_gradients(self, grad): def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad
@ -179,15 +208,15 @@ class SpikeAndSlabPosterior(VariationalPosterior):
n.parameters[dc['variance']._parent_index_] = dc['variance'] n.parameters[dc['variance']._parent_index_] = dc['variance']
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob'] n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
n._gradient_array_ = None n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size oversize = self.size - self.mean.size - self.variance.size - self.gamma.size
n.size = n.mean.size + n.variance.size + oversize n.size = n.mean.size + n.variance.size + n.gamma.size + oversize
n.ndim = n.mean.ndim n.ndim = n.mean.ndim
n.shape = n.mean.shape n.shape = n.mean.shape
n.num_data = n.mean.shape[0] n.num_data = n.mean.shape[0]
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1 n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
return n return n
else: else:
return super(VariationalPrior, self).__getitem__(s) return super(SpikeAndSlabPosterior, self).__getitem__(s)
def plot(self, *args, **kwargs): def plot(self, *args, **kwargs):
""" """

View file

@ -133,7 +133,7 @@ class SparseGP(GP):
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx)) var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3: elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2])) var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]): for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx)) var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
var = var var = var
else: else:

View file

@ -46,7 +46,7 @@ class SVGP(SparseGP):
num_latent_functions = Y.shape[1] num_latent_functions = Y.shape[1]
self.m = Param('q_u_mean', np.zeros((self.num_inducing, num_latent_functions))) self.m = Param('q_u_mean', np.zeros((self.num_inducing, num_latent_functions)))
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,num_latent_functions))) chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[None,:,:], (num_latent_functions, 1,1)))
self.chol = Param('q_u_chol', chol) self.chol = Param('q_u_chol', chol)
self.link_parameter(self.chol) self.link_parameter(self.chol)
self.link_parameter(self.m) self.link_parameter(self.m)

View file

@ -5,9 +5,10 @@ from __future__ import print_function
import numpy as np import numpy as np
import sys import sys
import time import time
import datetime
def exponents(fnow, current_grad): def exponents(fnow, current_grad):
exps = [np.abs(np.float(fnow)), current_grad] exps = [np.abs(np.float(fnow)), 1 if current_grad is np.nan else current_grad]
return np.sign(exps) * np.log10(exps).astype(int) return np.sign(exps) * np.log10(exps).astype(int)
class VerboseOptimization(object): class VerboseOptimization(object):
@ -23,6 +24,7 @@ class VerboseOptimization(object):
self.model.add_observer(self, self.print_status) self.model.add_observer(self, self.print_status)
self.status = 'running' self.status = 'running'
self.clear = clear_after_finish self.clear = clear_after_finish
self.deltat = .2
self.update() self.update()
@ -44,25 +46,25 @@ class VerboseOptimization(object):
self.hor_align = FlexBox(children = [left_col, right_col], width='100%', orientation='horizontal') self.hor_align = FlexBox(children = [left_col, right_col], width='100%', orientation='horizontal')
display(self.hor_align) display(self.hor_align)
try: try:
self.text.set_css('width', '100%') self.text.set_css('width', '100%')
left_col.set_css({ left_col.set_css({
'padding': '2px', 'padding': '2px',
'width': "100%", 'width': "100%",
}) })
right_col.set_css({ right_col.set_css({
'padding': '2px', 'padding': '2px',
}) })
self.hor_align.set_css({ self.hor_align.set_css({
'width': "100%", 'width': "100%",
}) })
self.hor_align.remove_class('vbox') self.hor_align.remove_class('vbox')
self.hor_align.add_class('hbox') self.hor_align.add_class('hbox')
left_col.add_class("box-flex1") left_col.add_class("box-flex1")
right_col.add_class('box-flex0') right_col.add_class('box-flex0')
@ -74,16 +76,31 @@ class VerboseOptimization(object):
else: else:
self.exps = exponents(self.fnow, self.current_gradient) self.exps = exponents(self.fnow, self.current_gradient)
print('Running {} Code:'.format(self.opt_name)) print('Running {} Code:'.format(self.opt_name))
print(' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "secs", mi=self.len_maxiters)) print(' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "runtime", mi=self.len_maxiters))
def __enter__(self): def __enter__(self):
self.start = time.time() self.start = time.time()
return self return self
def print_out(self): def print_out(self, seconds):
if seconds<60:
ms = (seconds%1)*100
self.timestring = "{s:0>2d}s{ms:0>2d}".format(s=int(seconds), ms=int(ms))
else:
m, s = divmod(seconds, 60)
if m>59:
h, m = divmod(m, 60)
if h>23:
d, h = divmod(h, 24)
self.timestring = '{d:0>2d}d{h:0>2d}h{m:0>2d}'.format(m=int(m), h=int(h), d=int(d))
else:
self.timestring = '{h:0>2d}h{m:0>2d}m{s:0>2d}'.format(m=int(m), s=int(s), h=int(h))
else:
ms = (seconds%1)*100
self.timestring = '{m:0>2d}m{s:0>2d}s{ms:0>2d}'.format(m=int(m), s=int(s), ms=int(ms))
if self.ipython_notebook: if self.ipython_notebook:
names_vals = [['optimizer', "{:s}".format(self.opt_name)], names_vals = [['optimizer', "{:s}".format(self.opt_name)],
['runtime [s]', "{:> g}".format(time.time()-self.start)], ['runtime', "{:>s}".format(self.timestring)],
['evaluation', "{:>0{l}}".format(self.iteration, l=self.len_maxiters)], ['evaluation', "{:>0{l}}".format(self.iteration, l=self.len_maxiters)],
['objective', "{: > 12.3E}".format(self.fnow)], ['objective', "{: > 12.3E}".format(self.fnow)],
['||gradient||', "{: >+12.3E}".format(float(self.current_gradient))], ['||gradient||', "{: >+12.3E}".format(float(self.current_gradient))],
@ -120,14 +137,18 @@ class VerboseOptimization(object):
if b: if b:
self.exps = n_exps self.exps = n_exps
print('\r', end=' ') print('\r', end=' ')
print('{3:> 7.2g} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), time.time()-self.start, mi=self.len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', print('{3:} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), "{:>8s}".format(self.timestring), mi=self.len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush() sys.stdout.flush()
def print_status(self, me, which=None): def print_status(self, me, which=None):
self.update() self.update()
seconds = time.time()-self.start
#sys.stdout.write(" "*len(self.message)) #sys.stdout.write(" "*len(self.message))
self.print_out() self.deltat += seconds
if self.deltat > .2:
self.print_out(seconds)
self.deltat = 0
self.iteration += 1 self.iteration += 1
@ -153,12 +174,12 @@ class VerboseOptimization(object):
if self.verbose: if self.verbose:
self.stop = time.time() self.stop = time.time()
self.model.remove_observer(self) self.model.remove_observer(self)
self.print_out() self.print_out(self.stop - self.start)
if not self.ipython_notebook: if not self.ipython_notebook:
print() print()
print('Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start)) print('Runtime: {}'.format("{:>9s}".format(self.timestring)))
print('Optimization status: {0}'.format(self.status)) print('Optimization status: {0}'.format(self.status))
print() print()
elif self.clear: elif self.clear:
self.hor_align.close() self.hor_align.close()

View file

@ -217,9 +217,8 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
elif model_type == 'FITC': elif model_type == 'FITC':
m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing) m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
m['.*len'] = 3. m['.*len'] = 3.
if optimize: if optimize:
m.pseudo_EM() m.optimize()
if plot: if plot:
m.plot() m.plot()

View file

@ -355,13 +355,13 @@ def ssgplvm_simulation(optimize=True, verbose=1,
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
# k = kern.RBF(Q, ARD=True, lengthscale=10.) # k = kern.RBF(Q, ARD=True, lengthscale=10.)
m = SSGPLVM(Y, Q, init="pca", num_inducing=num_inducing, kernel=k) m = SSGPLVM(Y, Q, init="rand", num_inducing=num_inducing, kernel=k, group_spike=True)
m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape) m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape)
m.likelihood.variance = .1 m.likelihood.variance = .01
if optimize: if optimize:
print("Optimizing model:") print("Optimizing model:")
m.optimize('scg', messages=verbose, max_iters=max_iters, m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
m.X.plot("SSGPLVM Latent Space 1D") m.X.plot("SSGPLVM Latent Space 1D")

View file

@ -45,17 +45,23 @@ class InferenceX(Model):
super(InferenceX, self).__init__(name) super(InferenceX, self).__init__(name)
self.likelihood = model.likelihood.copy() self.likelihood = model.likelihood.copy()
self.kern = model.kern.copy() self.kern = model.kern.copy()
if model.kern.useGPU: # if model.kern.useGPU:
from ...models import SSGPLVM # from ...models import SSGPLVM
if isinstance(model, SSGPLVM): # if isinstance(model, SSGPLVM):
self.kern.GPU_SSRBF(True) # self.kern.GPU_SSRBF(True)
else: # else:
self.kern.GPU(True) # self.kern.GPU(True)
from copy import deepcopy from copy import deepcopy
self.posterior = deepcopy(model.posterior) self.posterior = deepcopy(model.posterior)
if hasattr(model, 'variational_prior'): if hasattr(model, 'variational_prior'):
self.uncertain_input = True self.uncertain_input = True
self.variational_prior = model.variational_prior.copy() from ...models.ss_gplvm import IBPPrior
from ...models.ss_mrd import IBPPrior_SSMRD
if isinstance(model.variational_prior, IBPPrior) or isinstance(model.variational_prior, IBPPrior_SSMRD):
from ...core.parameterization.variational import SpikeAndSlabPrior
self.variational_prior = SpikeAndSlabPrior(pi=05,learnPi=False, group_spike=False)
else:
self.variational_prior = model.variational_prior.copy()
else: else:
self.uncertain_input = False self.uncertain_input = False
if hasattr(model, 'inducing_inputs'): if hasattr(model, 'inducing_inputs'):
@ -147,9 +153,9 @@ class InferenceX(Model):
from ...core.parameterization.variational import SpikeAndSlabPrior from ...core.parameterization.variational import SpikeAndSlabPrior
if isinstance(self.variational_prior, SpikeAndSlabPrior): if isinstance(self.variational_prior, SpikeAndSlabPrior):
# Update Log-likelihood # Update Log-likelihood
KL_div = self.variational_prior.KL_divergence(self.X, N=self.Y.shape[0]) KL_div = self.variational_prior.KL_divergence(self.X)
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X, N=self.Y.shape[0]) self.variational_prior.update_gradients_KL(self.X)
else: else:
# Update Log-likelihood # Update Log-likelihood
KL_div = self.variational_prior.KL_divergence(self.X) KL_div = self.variational_prior.KL_divergence(self.X)

View file

@ -3,6 +3,7 @@ from ...util import linalg
from ...util import choleskies from ...util import choleskies
import numpy as np import numpy as np
from .posterior import Posterior from .posterior import Posterior
from scipy.linalg.blas import dgemm, dsymm, dtrmm
class SVGP(LatentFunctionInference): class SVGP(LatentFunctionInference):
@ -16,16 +17,13 @@ class SVGP(LatentFunctionInference):
S = np.empty((num_outputs, num_inducing, num_inducing)) S = np.empty((num_outputs, num_inducing, num_inducing))
[np.dot(L[:,:,i], L[:,:,i].T, S[i,:,:]) for i in range(num_outputs)] [np.dot(L[i,:,:], L[i,:,:].T, S[i,:,:]) for i in range(num_outputs)]
S = S.swapaxes(0,2)
#Si,_ = linalg.dpotri(np.asfortranarray(L), lower=1) #Si,_ = linalg.dpotri(np.asfortranarray(L), lower=1)
Si = choleskies.multiple_dpotri(L) Si = choleskies.multiple_dpotri(L)
logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])]) logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[i,:,:])))) for i in range(L.shape[0])])
if np.any(np.isinf(Si)): if np.any(np.isinf(Si)):
raise ValueError("Cholesky representation unstable") raise ValueError("Cholesky representation unstable")
#S = S + np.eye(S.shape[0])*1e-5*np.max(np.max(S))
#Si, Lnew, _,_ = linalg.pdinv(S)
#compute mean function stuff #compute mean function stuff
if mean_function is not None: if mean_function is not None:
@ -35,27 +33,31 @@ class SVGP(LatentFunctionInference):
prior_mean_u = np.zeros((num_inducing, num_outputs)) prior_mean_u = np.zeros((num_inducing, num_outputs))
prior_mean_f = np.zeros((num_data, num_outputs)) prior_mean_f = np.zeros((num_data, num_outputs))
#compute kernel related stuff #compute kernel related stuff
Kmm = kern.K(Z) Kmm = kern.K(Z)
Knm = kern.K(X, Z) Kmn = kern.K(Z, X)
Knn_diag = kern.Kdiag(X) Knn_diag = kern.Kdiag(X)
Kmmi, Lm, Lmi, logdetKmm = linalg.pdinv(Kmm) Lm = linalg.jitchol(Kmm)
logdetKmm = 2.*np.sum(np.log(np.diag(Lm)))
Kmmi, _ = linalg.dpotri(Lm)
#compute the marginal means and variances of q(f) #compute the marginal means and variances of q(f)
A = np.dot(Knm, Kmmi) A, _ = linalg.dpotrs(Lm, Kmn)
mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u) mu = prior_mean_f + np.dot(A.T, q_u_mean - prior_mean_u)
#v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jlk->ilk', A, S),1) v = np.empty((num_data, num_outputs))
v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * linalg.ij_jlk_to_ilk(A, S),1) for i in range(num_outputs):
tmp = dtrmm(1.0,L[i].T, A, lower=0, trans_a=0)
v[:,i] = np.sum(np.square(tmp),0)
v += (Knn_diag - np.sum(A*Kmn,0))[:,None]
#compute the KL term #compute the KL term
Kmmim = np.dot(Kmmi, q_u_mean) Kmmim = np.dot(Kmmi, q_u_mean)
KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[:,:,None]*S,0).sum(0) + 0.5*np.sum(q_u_mean*Kmmim,0) KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[None,:,:]*S,1).sum(1) + 0.5*np.sum(q_u_mean*Kmmim,0)
KL = KLs.sum() KL = KLs.sum()
#gradient of the KL term (assuming zero mean function) #gradient of the KL term (assuming zero mean function)
dKL_dm = Kmmim.copy() dKL_dm = Kmmim.copy()
dKL_dS = 0.5*(Kmmi[:,:,None] - Si) dKL_dS = 0.5*(Kmmi[None,:,:] - Si)
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T) dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(0)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
if mean_function is not None: if mean_function is not None:
#adjust KL term for mean function #adjust KL term for mean function
@ -80,17 +82,20 @@ class SVGP(LatentFunctionInference):
dF_dthetaL = dF_dthetaL.sum(1).sum(1)*batch_scale dF_dthetaL = dF_dthetaL.sum(1).sum(1)*batch_scale
#derivatives of expected likelihood, assuming zero mean function #derivatives of expected likelihood, assuming zero mean function
Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal Adv = A[None,:,:]*dF_dv.T[:,None,:] # As if dF_Dv is diagonal, D, M, N
Admu = A.T.dot(dF_dmu) Admu = A.dot(dF_dmu)
AdvA = np.dstack([np.dot(A.T, Adv[:,:,i].T) for i in range(num_outputs)]) Adv = np.ascontiguousarray(Adv) # makes for faster operations later...(inc dsymm)
#tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi) AdvA = np.dot(Adv.reshape(-1, num_data),A.T).reshape(num_outputs, num_inducing, num_inducing )
tmp = linalg.ijk_jlk_to_il(AdvA, S).dot(Kmmi) tmp = np.sum([np.dot(a,s) for a, s in zip(AdvA, S)],0).dot(Kmmi)
dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(-1) - tmp - tmp.T dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(0) - tmp - tmp.T
dF_dKmm = 0.5*(dF_dKmm + dF_dKmm.T) # necessary? GPy bug? dF_dKmm = 0.5*(dF_dKmm + dF_dKmm.T) # necessary? GPy bug?
#tmp = 2.*(np.einsum('ij,jlk->ilk', Kmmi,S) - np.eye(num_inducing)[:,:,None]) tmp = S.reshape(-1, num_inducing).dot(Kmmi).reshape(num_outputs, num_inducing , num_inducing )
tmp = 2.*(linalg.ij_jlk_to_ilk(Kmmi, S) - np.eye(num_inducing)[:,:,None]) tmp = 2.*(tmp - np.eye(num_inducing)[None, :,:])
#dF_dKmn = np.einsum('ijk,jlk->il', tmp, Adv) + Kmmim.dot(dF_dmu.T)
dF_dKmn = linalg.ijk_jlk_to_il(tmp, Adv) + Kmmim.dot(dF_dmu.T) dF_dKmn = Kmmim.dot(dF_dmu.T)
for a,b in zip(tmp, Adv):
dF_dKmn += np.dot(a.T, b)
dF_dm = Admu dF_dm = Admu
dF_dS = AdvA dF_dS = AdvA
@ -106,11 +111,11 @@ class SVGP(LatentFunctionInference):
log_marginal = F.sum() - KL log_marginal = F.sum() - KL
dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn
dL_dchol = np.dstack([2.*np.dot(dL_dS[:,:,i], L[:,:,i]) for i in range(num_outputs)]) dL_dchol = 2.*np.array([np.dot(a,b) for a, b in zip(dL_dS, L) ])
dL_dchol = choleskies.triang_to_flat(dL_dchol) dL_dchol = choleskies.triang_to_flat(dL_dchol)
grad_dict = {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv.sum(1), 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL} grad_dict = {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv.sum(1), 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
if mean_function is not None: if mean_function is not None:
grad_dict['dL_dmfZ'] = dF_dmfZ - dKL_dmfZ grad_dict['dL_dmfZ'] = dF_dmfZ - dKL_dmfZ
grad_dict['dL_dmfX'] = dF_dmfX grad_dict['dL_dmfX'] = dF_dmfX
return Posterior(mean=q_u_mean, cov=S, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict return Posterior(mean=q_u_mean, cov=S.T, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict

View file

@ -142,7 +142,7 @@ class opt_lbfgsb(Optimizer):
#a more helpful error message is available in opt_result in the Error case #a more helpful error message is available in opt_result in the Error case
if opt_result[2]['warnflag']==2: if opt_result[2]['warnflag']==2:
self.status = 'Error' + opt_result[2]['task'] self.status = 'Error' + str(opt_result[2]['task'])
class opt_simplex(Optimizer): class opt_simplex(Optimizer):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):

View file

@ -19,5 +19,5 @@ from ._src.splitKern import SplitKern,DEtime
from ._src.splitKern import DEtime as DiffGenomeKern from ._src.splitKern import DEtime as DiffGenomeKern
from _src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel from ._src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel

View file

@ -6,7 +6,7 @@ import numpy as np
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
from ...util.config import config # for assesing whether to use cython from ...util.config import config # for assesing whether to use cython
import coregionalize_cython from . import coregionalize_cython
class Coregionalize(Kern): class Coregionalize(Kern):
""" """

View file

@ -105,7 +105,7 @@ class IndependentOutputs(CombinationKernel):
if X2 is None: if X2 is None:
# TODO: make use of index_to_slices # TODO: make use of index_to_slices
# FIXME: Broken as X is already sliced out # FIXME: Broken as X is already sliced out
print "Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!" print("Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!")
values = np.unique(X[:,self.index_dim]) values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values] slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None)) [target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))

View file

@ -78,7 +78,7 @@ class MLP(Kern):
*((vec1[:, None]+vec2[None, :])*self.weight_variance *((vec1[:, None]+vec2[None, :])*self.weight_variance
+ 2*self.bias_variance + 2.))*base_cov_grad).sum() + 2*self.bias_variance + 2.))*base_cov_grad).sum()
def update_gradients_diag(self, X): def update_gradients_diag(self, dL_dKdiag, X):
self._K_diag_computations(X) self._K_diag_computations(X)
self.variance.gradient = np.sum(self._K_diag_dvar*dL_dKdiag) self.variance.gradient = np.sum(self._K_diag_dvar*dL_dKdiag)

View file

@ -20,7 +20,6 @@ class RBF(Stationary):
_support_GPU = True _support_GPU = True
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU) super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
self.psicomp = PSICOMP_RBF()
if self.useGPU: if self.useGPU:
self.psicomp = PSICOMP_RBF_GPU() self.psicomp = PSICOMP_RBF_GPU()
else: else:
@ -36,6 +35,7 @@ class RBF(Stationary):
dc = super(RBF, self).__getstate__() dc = super(RBF, self).__getstate__()
if self.useGPU: if self.useGPU:
dc['psicomp'] = PSICOMP_RBF() dc['psicomp'] = PSICOMP_RBF()
dc['useGPU'] = False
return dc return dc
def __setstate__(self, state): def __setstate__(self, state):

View file

@ -13,9 +13,9 @@ from ...util.config import config # for assesing whether to use cython
from ...util.caching import Cache_this from ...util.caching import Cache_this
try: try:
import stationary_cython from . import stationary_cython
except ImportError: except ImportError:
print('warning: failed to import cython module: falling back to numpy') print('warning in sationary: failed to import cython module: falling back to numpy')
config.set('cython', 'working', 'false') config.set('cython', 'working', 'false')

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,9 @@
#cython: boundscheck=False #cython: boundscheck=False
#cython: nonecheck=False
#cython: wraparound=False #cython: wraparound=False
import numpy as np import numpy as np
cimport numpy as np cimport numpy as np
from cython.parallel import prange
ctypedef np.float64_t DTYPE_t ctypedef np.float64_t DTYPE_t
@ -22,7 +24,18 @@ def grad_X(int N, int D, int M,
cdef double *grad = <double*> _grad.data cdef double *grad = <double*> _grad.data
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place. _grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
def lengthscale_grads(int N, int M, int Q, def grad_X_cython(int N, int D, int M, double[:,:] X, double[:,:] X2, double[:,:] tmp, double[:,:] grad):
cdef int n,d,nd,m
for nd in prange(N*D, nogil=True):
n = nd/D
d = nd%D
grad[n,d] = 0.0
for m in range(M):
grad[n,d] += tmp[n,m]*(X[n,d]-X2[m,d])
def lengthscale_grads_in_c(int N, int M, int Q,
np.ndarray[DTYPE_t, ndim=2] _tmp, np.ndarray[DTYPE_t, ndim=2] _tmp,
np.ndarray[DTYPE_t, ndim=2] _X, np.ndarray[DTYPE_t, ndim=2] _X,
np.ndarray[DTYPE_t, ndim=2] _X2, np.ndarray[DTYPE_t, ndim=2] _X2,
@ -33,4 +46,14 @@ def lengthscale_grads(int N, int M, int Q,
cdef double *grad = <double*> _grad.data cdef double *grad = <double*> _grad.data
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place. _lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
def lengthscale_grads(int N, int M, int Q, double[:,:] tmp, double[:,:] X, double[:,:] X2, double[:] grad):
cdef int q, n, m
cdef double gradq, dist
for q in range(Q):
grad[q] = 0.0
for n in range(N):
for m in range(M):
dist = X[n,q] - X2[m,q]
grad[q] += tmp[n,m]*dist*dist

View file

@ -1,19 +1,36 @@
void _grad_X(int N, int D, int M, double* X, double* X2, double* tmp, double* grad){ void _grad_X(int N, int D, int M, double* X, double* X2, double* tmp, double* grad){
int n,m,d;
double retnd; double retnd;
//#pragma omp parallel for private(n,d, retnd, m) int n,d,nd,m;
for(d=0;d<D;d++){ #pragma omp parallel for private(nd,n,d, retnd, m)
for(n=0;n<N;n++){ for(nd=0;nd<(D*N);nd++){
retnd = 0.0; n = nd/D;
for(m=0;m<M;m++){ d = nd%D;
retnd += tmp[n*M+m]*(X[n*D+d]-X2[m*D+d]); retnd = 0.0;
} for(m=0;m<M;m++){
grad[n*D+d] = retnd; retnd += tmp[n*M+m]*(X[nd]-X2[m*D+d]);
} }
grad[nd] = retnd;
} }
} //grad_X } //grad_X
void _lengthscale_grads_unsafe(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
int n,m,nm,q,nQ,mQ;
double dist;
#pragma omp parallel for private(n,m,nm,q,nQ,mQ,dist)
for(nm=0; nm<(N*M); nm++){
n = nm/M;
m = nm%M;
nQ = n*Q;
mQ = m*Q;
for(q=0; q<Q; q++){
dist = X[nQ+q]-X2[mQ+q];
grad[q] += tmp[nm]*dist*dist;
}
}
} //lengthscale_grads
void _lengthscale_grads(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){ void _lengthscale_grads(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
int n,m,q; int n,m,q;
double gradq, dist; double gradq, dist;
@ -33,3 +50,5 @@ for(q=0; q<Q; q++){

View file

@ -143,7 +143,7 @@ class Likelihood(Parameterized):
p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf) p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf)
for yi, mi, vi, yi_m in zipped_values]) for yi, mi, vi, yi_m in zipped_values])
p_ystar = np.array(p_ystar).reshape(-1, 1) p_ystar = np.array(p_ystar).reshape(*y_test.shape)
return np.log(p_ystar) return np.log(p_ystar)
def log_predictive_density_sampling(self, y_test, mu_star, var_star, Y_metadata=None, num_samples=1000): def log_predictive_density_sampling(self, y_test, mu_star, var_star, Y_metadata=None, num_samples=1000):
@ -173,6 +173,7 @@ class Likelihood(Parameterized):
from scipy.misc import logsumexp from scipy.misc import logsumexp
log_p_ystar = -np.log(num_samples) + logsumexp(self.logpdf(fi_samples, y_test, Y_metadata=Y_metadata), axis=1) log_p_ystar = -np.log(num_samples) + logsumexp(self.logpdf(fi_samples, y_test, Y_metadata=Y_metadata), axis=1)
log_p_ystar = np.array(log_p_ystar).reshape(*y_test.shape)
return log_p_ystar return log_p_ystar
@ -265,8 +266,8 @@ class Likelihood(Parameterized):
stop stop
if self.size: if self.size:
dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None]) # Ntheta x (orig size) x N_{quad_points} dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None], Y_metadata=Y_metadata) # Ntheta x (orig size) x N_{quad_points}
dF_dtheta = np.dot(dF_dtheta, gh_w) dF_dtheta = np.dot(dF_dtheta, gh_w)/np.sqrt(np.pi)
dF_dtheta = dF_dtheta.reshape(self.size, shape[0], shape[1]) dF_dtheta = dF_dtheta.reshape(self.size, shape[0], shape[1])
else: else:
dF_dtheta = None # Not yet implemented dF_dtheta = None # Not yet implemented

View file

@ -64,9 +64,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
self.logger.debug("creating inference_method var_dtc") self.logger.debug("creating inference_method var_dtc")
inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1]) inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1])
if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
kernel.psicomp.GPU_direct = True
super(BayesianGPLVMMiniBatch,self).__init__(X, Y, Z, kernel, likelihood=likelihood, super(BayesianGPLVMMiniBatch,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
name=name, inference_method=inference_method, name=name, inference_method=inference_method,
normalizer=normalizer, normalizer=normalizer,

View file

@ -3,7 +3,7 @@
import numpy as np import numpy as np
from .. import kern from .. import kern
from bayesian_gplvm import BayesianGPLVM from .bayesian_gplvm import BayesianGPLVM
from ..core.parameterization.variational import NormalPosterior, NormalPrior from ..core.parameterization.variational import NormalPosterior, NormalPrior
class DPBayesianGPLVM(BayesianGPLVM): class DPBayesianGPLVM(BayesianGPLVM):
@ -15,5 +15,5 @@ class DPBayesianGPLVM(BayesianGPLVM):
name='bayesian gplvm', mpi_comm=None, normalizer=None, name='bayesian gplvm', mpi_comm=None, normalizer=None,
missing_data=False, stochastic=False, batchsize=1): missing_data=False, stochastic=False, batchsize=1):
super(DPBayesianGPLVM,self).__init__(Y=Y, input_dim=input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, mpi_comm=mpi_comm, normalizer=normalizer, missing_data=missing_data, stochastic=stochastic, batchsize=batchsize, name='dp bayesian gplvm') super(DPBayesianGPLVM,self).__init__(Y=Y, input_dim=input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, mpi_comm=mpi_comm, normalizer=normalizer, missing_data=missing_data, stochastic=stochastic, batchsize=batchsize, name='dp bayesian gplvm')
self.X.mean.set_prior(X_prior) self.X.mean.set_prior(X_prior)
self.link_parameter(X_prior) self.link_parameter(X_prior)

View file

@ -16,6 +16,7 @@ class GPRegression(GP):
:param Y: observed values :param Y: observed values
:param kernel: a GPy kernel, defaults to rbf :param kernel: a GPy kernel, defaults to rbf
:param Norm normalizer: [False] :param Norm normalizer: [False]
:param noise_var: the noise variance for Gaussian likelhood, defaults to 1.
Normalize Y with the norm given. Normalize Y with the norm given.
If normalizer is False, no normalization will be done If normalizer is False, no normalization will be done
@ -25,12 +26,12 @@ class GPRegression(GP):
""" """
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None): def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1.):
if kernel is None: if kernel is None:
kernel = kern.RBF(X.shape[1]) kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Gaussian() likelihood = likelihoods.Gaussian(variance=noise_var)
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer) super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer)

View file

@ -15,7 +15,7 @@ log_2_pi = np.log(2*np.pi)
class GPVariationalGaussianApproximation(Model): class GPVariationalGaussianApproximation(Model):
""" """
The Variational Gaussian Approximation revisited implementation for regression The Variational Gaussian Approximation revisited
@article{Opper:2009, @article{Opper:2009,
title = {The Variational Gaussian Approximation Revisited}, title = {The Variational Gaussian Approximation Revisited},
@ -25,44 +25,27 @@ class GPVariationalGaussianApproximation(Model):
pages = {786--792}, pages = {786--792},
} }
""" """
def __init__(self, X, Y, kernel=None): def __init__(self, X, Y, kernel, likelihood,Y_metadata=None):
Model.__init__(self,'Variational GP classification') Model.__init__(self,'Variational GP classification')
# accept the construction arguments # accept the construction arguments
self.X = ObsAr(X) self.X = ObsAr(X)
if kernel is None: self.Y = Y
kernel = kern.RBF(X.shape[1]) + kern.White(X.shape[1], 0.01)
self.kern = kernel
self.link_parameter(self.kern)
self.num_data, self.input_dim = self.X.shape self.num_data, self.input_dim = self.X.shape
self.Y_metadata = Y_metadata
self.alpha = Param('alpha', np.zeros(self.num_data)) self.kern = kernel
self.likelihood = likelihood
self.link_parameter(self.kern)
self.link_parameter(self.likelihood)
self.alpha = Param('alpha', np.zeros((self.num_data,1))) # only one latent fn for now.
self.beta = Param('beta', np.ones(self.num_data)) self.beta = Param('beta', np.ones(self.num_data))
self.link_parameter(self.alpha) self.link_parameter(self.alpha)
self.link_parameter(self.beta) self.link_parameter(self.beta)
self.gh_x, self.gh_w = np.polynomial.hermite.hermgauss(20)
self.Ysign = np.where(Y==1, 1, -1).flatten()
def log_likelihood(self): def log_likelihood(self):
"""
Marginal log likelihood evaluation
"""
return self._log_lik return self._log_lik
def likelihood_quadrature(self, m, v):
"""
Perform Gauss-Hermite quadrature over the log of the likelihood, with a fixed weight
"""
# assume probit for now.
X = self.gh_x[None, :]*np.sqrt(2.*v[:, None]) + (m*self.Ysign)[:, None]
p = stats.norm.cdf(X)
N = stats.norm.pdf(X)
F = np.log(p).dot(self.gh_w)
NoverP = N/p
dF_dm = (NoverP*self.Ysign[:,None]).dot(self.gh_w)
dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(self.gh_w)
return F, dF_dm, dF_dv
def parameters_changed(self): def parameters_changed(self):
K = self.kern.K(self.X) K = self.kern.K(self.X)
m = K.dot(self.alpha) m = K.dot(self.alpha)
@ -71,13 +54,14 @@ class GPVariationalGaussianApproximation(Model):
A = np.eye(self.num_data) + BKB A = np.eye(self.num_data) + BKB
Ai, LA, _, Alogdet = pdinv(A) Ai, LA, _, Alogdet = pdinv(A)
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
var = np.diag(Sigma) var = np.diag(Sigma).reshape(-1,1)
F, dF_dm, dF_dv = self.likelihood_quadrature(m, var) F, dF_dm, dF_dv, dF_dthetaL = self.likelihood.variational_expectations(self.Y, m, var, Y_metadata=self.Y_metadata)
self.likelihood.gradient = dF_dthetaL.sum(1).sum(1)
dF_da = np.dot(K, dF_dm) dF_da = np.dot(K, dF_dm)
SigmaB = Sigma*self.beta SigmaB = Sigma*self.beta
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv)).dot(SigmaB))*2 dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + m.dot(self.alpha)) KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + np.sum(m*self.alpha))
dKL_da = m dKL_da = m
A_A2 = Ai - Ai.dot(Ai) A_A2 = Ai - Ai.dot(Ai)
dKL_db = np.diag(np.dot(KB.T, A_A2)) dKL_db = np.diag(np.dot(KB.T, A_A2))
@ -86,12 +70,12 @@ class GPVariationalGaussianApproximation(Model):
self.beta.gradient = dF_db - dKL_db self.beta.gradient = dF_db - dKL_db
# K-gradients # K-gradients
dKL_dK = 0.5*(self.alpha[None, :]*self.alpha[:, None] + self.beta[:, None]*self.beta[None, :]*A_A2) dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
tmp = Ai*self.beta[:, None]/self.beta[None, :] tmp = Ai*self.beta[:, None]/self.beta[None, :]
dF_dK = self.alpha[:, None]*dF_dm[None, :] + np.dot(tmp*dF_dv, tmp.T) dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X) self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
def predict(self, Xnew): def _raw_predict(self, Xnew):
""" """
Predict the function(s) at the new point(s) Xnew. Predict the function(s) at the new point(s) Xnew.
@ -105,4 +89,4 @@ class GPVariationalGaussianApproximation(Model):
Kxx = self.kern.Kdiag(Xnew) Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(WiKux*Kux, 0) var = Kxx - np.sum(WiKux*Kux, 0)
return 0.5*(1+erf(mu/np.sqrt(2.*(var+1)))) return mu, var.reshape(-1,1)

View file

@ -228,14 +228,14 @@ class HessianChecker(GradientChecker):
if verbose: if verbose:
if block_indices: if block_indices:
print "\nBlock {}".format(block_indices) print("\nBlock {}".format(block_indices))
else: else:
print "\nAll blocks" print("\nAll blocks")
header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference'] header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference']
header_string = map(lambda x: ' | '.join(header), [header]) header_string = map(lambda x: ' | '.join(header), [header])
separator = '-' * len(header_string[0]) separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator]) print('\n'.join([header_string[0], separator]))
min_r = '%.6f' % float(numpy.min(ratio)) min_r = '%.6f' % float(numpy.min(ratio))
max_r = '%.6f' % float(numpy.max(ratio)) max_r = '%.6f' % float(numpy.max(ratio))
max_d = '%.6f' % float(numpy.max(difference)) max_d = '%.6f' % float(numpy.max(difference))
@ -248,7 +248,7 @@ class HessianChecker(GradientChecker):
checked = "\033[91m False \033[0m" checked = "\033[91m False \033[0m"
grad_string = "{} | {} | {} | {} | {} ".format(checked, cols[0], cols[1], cols[2], cols[3]) grad_string = "{} | {} | {} | {} | {} ".format(checked, cols[0], cols[1], cols[2], cols[3])
print grad_string print(grad_string)
if plot: if plot:
import pylab as pb import pylab as pb
@ -348,7 +348,7 @@ class SkewChecker(HessianChecker):
numeric_hess_partial = nd.Jacobian(self._df, vectorized=True) numeric_hess_partial = nd.Jacobian(self._df, vectorized=True)
numeric_hess = numeric_hess_partial(x) numeric_hess = numeric_hess_partial(x)
print "Done making numerical hessian" print("Done making numerical hessian")
if analytic_hess.dtype is np.dtype('object'): if analytic_hess.dtype is np.dtype('object'):
#Blockify numeric_hess aswell #Blockify numeric_hess aswell
blocksizes, pagesizes = get_block_shapes_3d(analytic_hess) blocksizes, pagesizes = get_block_shapes_3d(analytic_hess)
@ -365,7 +365,7 @@ class SkewChecker(HessianChecker):
#Unless super_plot is set, just plot the first one #Unless super_plot is set, just plot the first one
p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False
if verbose: if verbose:
print "Checking derivative of hessian wrt parameter number {}".format(block_ind) print("Checking derivative of hessian wrt parameter number {}".format(block_ind))
check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p) check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p)
current_index += current_size current_index += current_size

View file

@ -5,11 +5,95 @@ import numpy as np
from ..core.sparse_gp_mpi import SparseGP_MPI from ..core.sparse_gp_mpi import SparseGP_MPI
from .. import kern from .. import kern
from ..core.parameterization import Param
from ..likelihoods import Gaussian from ..likelihoods import Gaussian
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior,VariationalPrior
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
from ..kern._src.psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF_GPU from ..kern._src.psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF_GPU
class IBPPosterior(SpikeAndSlabPosterior):
'''
The SpikeAndSlab distribution for variational approximations.
'''
def __init__(self, means, variances, binary_prob, tau=None, sharedX=False, name='latent space'):
"""
binary_prob : the probability of the distribution on the slab part.
"""
from ..core.parameterization.transformations import Logexp
super(IBPPosterior, self).__init__(means, variances, binary_prob, group_spike=True, name=name)
self.sharedX = sharedX
if sharedX:
self.mean.fix(warning=False)
self.variance.fix(warning=False)
self.tau = Param("tau_", np.ones((self.gamma_group.shape[0],2)), Logexp())
self.link_parameter(self.tau)
def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient, self.tau.gradient = grad
def __getitem__(self, s):
if isinstance(s, (int, slice, tuple, list, np.ndarray)):
import copy
n = self.__new__(self.__class__, self.name)
dc = self.__dict__.copy()
dc['mean'] = self.mean[s]
dc['variance'] = self.variance[s]
dc['binary_prob'] = self.binary_prob[s]
dc['tau'] = self.tau
dc['parameters'] = copy.copy(self.parameters)
n.__dict__.update(dc)
n.parameters[dc['mean']._parent_index_] = dc['mean']
n.parameters[dc['variance']._parent_index_] = dc['variance']
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
n.parameters[dc['tau']._parent_index_] = dc['tau']
n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size - self.gamma.size - self.tau.size
n.size = n.mean.size + n.variance.size + n.gamma.size+ n.tau.size + oversize
n.ndim = n.mean.ndim
n.shape = n.mean.shape
n.num_data = n.mean.shape[0]
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
return n
else:
return super(IBPPosterior, self).__getitem__(s)
class IBPPrior(VariationalPrior):
def __init__(self, input_dim, alpha =2., name='IBPPrior', **kw):
super(IBPPrior, self).__init__(name=name, **kw)
from ..core.parameterization.transformations import Logexp, __fixed__
self.input_dim = input_dim
self.variance = 1.
self.alpha = Param('alpha', alpha, __fixed__)
self.link_parameter(self.alpha)
def KL_divergence(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
part1 = (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
ad = self.alpha/self.input_dim
from scipy.special import betaln,digamma
part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + betaln(ad,1.)*self.input_dim \
-betaln(tau[:,0], tau[:,1]).sum() + ((tau[:,0]-gamma-ad)*digamma(tau[:,0])).sum() + \
((tau[:,1]+gamma-2.)*digamma(tau[:,1])).sum() + ((2.+ad-tau[:,0]-tau[:,1])*digamma(tau.sum(axis=1))).sum()
return part1+part2
def update_gradients_KL(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
variational_posterior.mean.gradient -= gamma*mu/self.variance
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * gamma /2.
from scipy.special import digamma,polygamma
dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
ad = self.alpha/self.input_dim
common = (ad+2-tau[:,0]-tau[:,1])*polygamma(1,tau.sum(axis=1))
variational_posterior.tau.gradient[:,0] = -((tau[:,0]-gamma-ad)*polygamma(1,tau[:,0])+common)
variational_posterior.tau.gradient[:,1] = -((tau[:,1]+gamma-2)*polygamma(1,tau[:,1])+common)
class SSGPLVM(SparseGP_MPI): class SSGPLVM(SparseGP_MPI):
""" """
Spike-and-Slab Gaussian Process Latent Variable Model Spike-and-Slab Gaussian Process Latent Variable Model
@ -23,9 +107,11 @@ class SSGPLVM(SparseGP_MPI):
""" """
def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10, def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, mpi_comm=None, pi=None, learnPi=True,normalizer=False, **kwargs): Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, IBP=False, alpha=2., tau=None, mpi_comm=None, pi=None, learnPi=False,normalizer=False, sharedX=False, variational_prior=None,**kwargs):
self.group_spike = group_spike self.group_spike = group_spike
self.init = init
self.sharedX = sharedX
if X == None: if X == None:
from ..util.initialization import initialize_latent from ..util.initialization import initialize_latent
@ -33,8 +119,6 @@ class SSGPLVM(SparseGP_MPI):
else: else:
fracs = np.ones(input_dim) fracs = np.ones(input_dim)
self.init = init
if X_variance is None: # The variance of the variational approximation (S) if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape) X_variance = np.random.uniform(0,.1,X.shape)
@ -64,17 +148,16 @@ class SSGPLVM(SparseGP_MPI):
if pi is None: if pi is None:
pi = np.empty((input_dim)) pi = np.empty((input_dim))
pi[:] = 0.5 pi[:] = 0.5
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi) # the prior probability of the latent binary variable b
if IBP:
X = SpikeAndSlabPosterior(X, X_variance, gamma) self.variational_prior = IBPPrior(input_dim=input_dim, alpha=alpha) if variational_prior is None else variational_prior
X = IBPPosterior(X, X_variance, gamma, tau=tau,sharedX=sharedX)
else:
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) if variational_prior is None else variational_prior
X = SpikeAndSlabPosterior(X, X_variance, gamma, group_spike=group_spike,sharedX=sharedX)
super(SSGPLVM,self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs) super(SSGPLVM,self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
# self.X.unfix()
# self.X.variance.constrain_positive()
self.link_parameter(self.X, index=0) self.link_parameter(self.X, index=0)
if self.group_spike:
[self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in range(self.X.gamma.shape[1])] # Tie columns together
def set_X_gradients(self, X, X_grad): def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form.""" """Set the gradients of the posterior distribution of X in its specific form."""
@ -84,9 +167,15 @@ class SSGPLVM(SparseGP_MPI):
"""Get the gradients of the posterior distribution of X in its specific form.""" """Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient, X.binary_prob.gradient return X.mean.gradient, X.variance.gradient, X.binary_prob.gradient
def _propogate_X_val(self):
pass
def parameters_changed(self): def parameters_changed(self):
self.X.propogate_val()
if self.sharedX: self._highest_parent_._propogate_X_val()
super(SSGPLVM,self).parameters_changed() super(SSGPLVM,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch): if isinstance(self.inference_method, VarDTC_minibatch):
self.X.collate_gradient()
return return
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
@ -95,6 +184,7 @@ class SSGPLVM(SparseGP_MPI):
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) self.variational_prior.update_gradients_KL(self.X)
self.X.collate_gradient()
def input_sensitivity(self): def input_sensitivity(self):
if self.kern.ARD: if self.kern.ARD:

View file

@ -2,33 +2,256 @@
The Maniforld Relevance Determination model with the spike-and-slab prior The Maniforld Relevance Determination model with the spike-and-slab prior
""" """
import numpy as np
from ..core import Model from ..core import Model
from .ss_gplvm import SSGPLVM from .ss_gplvm import SSGPLVM
from ..core.parameterization.variational import SpikeAndSlabPrior,NormalPosterior,VariationalPrior
from ..util.misc import param_to_array
from ..kern import RBF
from ..core import Param
from numpy.linalg.linalg import LinAlgError
class SSMRD(Model): class SSMRD(Model):
def __init__(self, Ylist, input_dim, X=None, X_variance=None, def __init__(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx = 'PCA_concat', initz = 'permute',
initx = 'PCA', initz = 'permute', num_inducing=10, Zs=None, kernels=None, inference_methods=None, likelihoods=None, group_spike=True,
num_inducing=10, Z=None, kernel=None, pi=0.5, name='ss_mrd', Ynames=None, mpi_comm=None, IBP=False, alpha=2., taus=None, ):
inference_method=None, likelihoods=None, name='ss_mrd', Ynames=None):
super(SSMRD, self).__init__(name) super(SSMRD, self).__init__(name)
self.mpi_comm = mpi_comm
self._PROPAGATE_ = False
self.updates = False # initialize X for individual models
self.models = [SSGPLVM(y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing,Z=Z,init=initx, X, X_variance, Gammas, fracs = self._init_X(Ylist, input_dim, X, X_variance, Gammas, initx)
kernel=kernel.copy() if kernel else None,inference_method=inference_method,likelihood=likelihoods, self.X = NormalPosterior(means=X, variances=X_variance)
name='model_'+str(i)) for i,y in enumerate(Ylist)]
self.add_parameters(*(self.models))
[[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.mean.shape[1])] if kernels is None:
for i in range(self.models[0].X.mean.shape[0])] kernels = [RBF(input_dim, lengthscale=1./fracs, ARD=True) for i in xrange(len(Ylist))]
[[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.variance.shape[1])] if Zs is None:
for i in range(self.models[0].X.variance.shape[0])] Zs = [None]* len(Ylist)
if likelihoods is None:
likelihoods = [None]* len(Ylist)
if inference_methods is None:
inference_methods = [None]* len(Ylist)
self.updates = True if IBP:
self.var_priors = [IBPPrior_SSMRD(len(Ylist),input_dim,alpha=alpha) for i in xrange(len(Ylist))]
else:
self.var_priors = [SpikeAndSlabPrior_SSMRD(nModels=len(Ylist),pi=pi,learnPi=False, group_spike=group_spike) for i in xrange(len(Ylist))]
self.models = [SSGPLVM(y, input_dim, X=X.copy(), X_variance=X_variance.copy(), Gamma=Gammas[i], num_inducing=num_inducing,Z=Zs[i], learnPi=False, group_spike=group_spike,
kernel=kernels[i],inference_method=inference_methods[i],likelihood=likelihoods[i], variational_prior=self.var_priors[i], IBP=IBP, tau=None if taus is None else taus[i],
name='model_'+str(i), mpi_comm=mpi_comm, sharedX=True) for i,y in enumerate(Ylist)]
self.link_parameters(*(self.models+[self.X]))
def _propogate_X_val(self):
if self._PROPAGATE_: return
for m in self.models:
m.X.mean.values[:] = self.X.mean.values
m.X.variance.values[:] = self.X.variance.values
varp_list = [m.X for m in self.models]
[vp._update_inernal(varp_list) for vp in self.var_priors]
self._PROPAGATE_=True
def _collate_X_gradient(self):
self._PROPAGATE_ = False
self.X.mean.gradient[:] = 0
self.X.variance.gradient[:] = 0
for m in self.models:
self.X.mean.gradient += m.X.mean.gradient
self.X.variance.gradient += m.X.variance.gradient
def parameters_changed(self): def parameters_changed(self):
super(SSMRD, self).parameters_changed() super(SSMRD, self).parameters_changed()
[m.parameters_changed() for m in self.models]
self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models]) self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
self._collate_X_gradient()
def log_likelihood(self): def log_likelihood(self):
return self._log_marginal_likelihood return self._log_marginal_likelihood
def _init_X(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx='PCA_concat'):
# Divide latent dimensions
idx = np.empty((input_dim,),dtype=np.int)
residue = (input_dim)%(len(Ylist))
for i in xrange(len(Ylist)):
if i < residue:
size = input_dim/len(Ylist)+1
idx[i*size:(i+1)*size] = i
else:
size = input_dim/len(Ylist)
idx[i*size+residue:(i+1)*size+residue] = i
if X is None:
if initx == 'PCA_concat':
X = np.empty((Ylist[0].shape[0],input_dim))
fracs = np.empty((input_dim,))
from ..util.initialization import initialize_latent
for i in xrange(len(Ylist)):
Y = Ylist[i]
dim = (idx==i).sum()
if dim>0:
x, fr = initialize_latent('PCA', dim, Y)
X[:,idx==i] = x
fracs[idx==i] = fr
elif initx=='PCA_joint':
y = np.hstack(Ylist)
from ..util.initialization import initialize_latent
X, fracs = initialize_latent('PCA', input_dim, y)
else:
X = np.random.randn(Ylist[0].shape[0], input_dim)
fracs = np.ones(input_dim)
else:
fracs = np.ones(input_dim)
if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape)
if Gammas is None:
Gammas = []
for x in X:
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
gamma[gamma>1.-1e-9] = 1.-1e-9
gamma[gamma<1e-9] = 1e-9
Gammas.append(gamma)
return X, X_variance, Gammas, fracs
@Model.optimizer_array.setter
def optimizer_array(self, p):
if self.mpi_comm != None:
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
self.mpi_comm.Bcast(np.int32(1),root=0)
self.mpi_comm.Bcast(p, root=0)
Model.optimizer_array.fset(self,p)
def optimize(self, optimizer=None, start=None, **kwargs):
self._IN_OPTIMIZATION_ = True
if self.mpi_comm==None:
super(SSMRD, self).optimize(optimizer,start,**kwargs)
elif self.mpi_comm.rank==0:
super(SSMRD, self).optimize(optimizer,start,**kwargs)
self.mpi_comm.Bcast(np.int32(-1),root=0)
elif self.mpi_comm.rank>0:
x = self.optimizer_array.copy()
flag = np.empty(1,dtype=np.int32)
while True:
self.mpi_comm.Bcast(flag,root=0)
if flag==1:
try:
self.optimizer_array = x
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError):
if self._fail_count >= self._allowed_failures:
raise
self._fail_count += 1
elif flag==-1:
break
else:
self._IN_OPTIMIZATION_ = False
raise Exception("Unrecognizable flag for synchronization!")
self._IN_OPTIMIZATION_ = False
class SpikeAndSlabPrior_SSMRD(SpikeAndSlabPrior):
def __init__(self, nModels, pi=0.5, learnPi=False, group_spike=True, variance = 1.0, name='SSMRDPrior', **kw):
self.nModels = nModels
self._b_prob_all = 0.5
super(SpikeAndSlabPrior_SSMRD, self).__init__(pi=pi,learnPi=learnPi,group_spike=group_spike,variance=variance, name=name, **kw)
def _update_inernal(self, varp_list):
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
# The probability for the binary variable for the same latent dimension of any of the models is on.
if self.group_spike:
self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
[np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
else:
self._b_prob_all = 1.-param_to_array(varp_list[0].binary_prob)
[np.multiply(self._b_prob_all, 1.-vp.binary_prob, self._b_prob_all) for vp in varp_list[1:]]
def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
if self.group_spike:
gamma = variational_posterior.binary_prob[0]
else:
gamma = variational_posterior.binary_prob
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
return var_gamma +((1.-self._b_prob_all)*(np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
N = variational_posterior.num_data
if self.group_spike:
gamma = variational_posterior.binary_prob.values[0]
else:
gamma = variational_posterior.binary_prob.values
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
if self.group_spike:
tmp = self._b_prob_all/(1.-gamma)
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/N +tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
else:
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
S.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
if self.learnPi:
raise 'Not Supported!'
class IBPPrior_SSMRD(VariationalPrior):
def __init__(self, nModels, input_dim, alpha =2., tau=None, name='IBPPrior', **kw):
super(IBPPrior_SSMRD, self).__init__(name=name, **kw)
from ..core.parameterization.transformations import Logexp, __fixed__
self.nModels = nModels
self._b_prob_all = 0.5
self.input_dim = input_dim
self.variance = 1.
self.alpha = Param('alpha', alpha, __fixed__)
self.link_parameter(self.alpha)
def _update_inernal(self, varp_list):
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
# The probability for the binary variable for the same latent dimension of any of the models is on.
self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
[np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
def KL_divergence(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
part1 = ((1.-self._b_prob_all)* (np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
ad = self.alpha/self.input_dim
from scipy.special import betaln,digamma
part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + (betaln(ad,1.)*self.input_dim -betaln(tau[:,0], tau[:,1]).sum())/self.nModels \
+ (( (tau[:,0]-ad)/self.nModels -gamma)*digamma(tau[:,0])).sum() + \
(((tau[:,1]-1.)/self.nModels+gamma-1.)*digamma(tau[:,1])).sum() + (((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*digamma(tau.sum(axis=1))).sum()
return part1+part2
def update_gradients_KL(self, variational_posterior):
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
variational_posterior.mean.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
from scipy.special import digamma,polygamma
tmp = self._b_prob_all/(1.-gamma)
dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data
variational_posterior.binary_prob.gradient -= dgamma+tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
ad = self.alpha/self.input_dim
common = ((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*polygamma(1,tau.sum(axis=1))
variational_posterior.tau.gradient[:,0] = -(((tau[:,0]-ad)/self.nModels -gamma)*polygamma(1,tau[:,0])+common)
variational_posterior.tau.gradient[:,1] = -(((tau[:,1]-1.)/self.nModels+gamma-1.)*polygamma(1,tau[:,1])+common)

View file

@ -11,6 +11,7 @@ from base_plots import gpplot, x_frame1D, x_frame2D
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from scipy import sparse from scipy import sparse
from ...core.parameterization.variational import VariationalPosterior
def plot_fit(model, plot_limits=None, which_data_rows='all', def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[], which_data_ycols='all', fixed_inputs=[],
@ -78,7 +79,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if predict_kw is None: if predict_kw is None:
predict_kw = {} predict_kw = {}
#work out what the inputs are for plotting (1D or 2D) #work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs]) fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims) free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
@ -219,7 +220,7 @@ def plot_fit_f(model, *args, **kwargs):
kwargs['plot_raw'] = True kwargs['plot_raw'] = True
plot_fit(model,*args, **kwargs) plot_fit(model,*args, **kwargs)
def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True): def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True, X_all=False):
""" """
Convenience function for returning back fixed_inputs where the other inputs Convenience function for returning back fixed_inputs where the other inputs
are fixed using fix_routine are fixed using fix_routine
@ -235,8 +236,13 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True):
f_inputs = [] f_inputs = []
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs(): if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean.values.copy() X = model.X.mean.values.copy()
else: elif isinstance(model.X, VariationalPosterior):
X = model.X.values.copy() X = model.X.values.copy()
else:
if X_all:
X = model.X_all.copy()
else:
X = model.X.copy()
for i in range(X.shape[1]): for i in range(X.shape[1]):
if i not in non_fixed_inputs: if i not in non_fixed_inputs:
if fix_routine == 'mean': if fix_routine == 'mean':

View file

@ -408,12 +408,13 @@ class mocap_data_show_vpython(vpython_show):
class mocap_data_show(matplotlib_show): class mocap_data_show(matplotlib_show):
"""Base class for visualizing motion capture data.""" """Base class for visualizing motion capture data."""
def __init__(self, vals, axes=None, connect=None): def __init__(self, vals, axes=None, connect=None, color='b'):
if axes==None: if axes==None:
fig = plt.figure() fig = plt.figure()
axes = fig.add_subplot(111, projection='3d', aspect='equal') axes = fig.add_subplot(111, projection='3d', aspect='equal')
matplotlib_show.__init__(self, vals, axes) matplotlib_show.__init__(self, vals, axes)
self.color = color
self.connect = connect self.connect = connect
self.process_values() self.process_values()
self.initialize_axes() self.initialize_axes()
@ -423,7 +424,7 @@ class mocap_data_show(matplotlib_show):
self.axes.figure.canvas.draw() self.axes.figure.canvas.draw()
def draw_vertices(self): def draw_vertices(self):
self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2]) self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2], color=self.color)
def draw_edges(self): def draw_edges(self):
self.line_handle = [] self.line_handle = []
@ -442,7 +443,7 @@ class mocap_data_show(matplotlib_show):
z.append(self.vals[i, 2]) z.append(self.vals[i, 2])
z.append(self.vals[j, 2]) z.append(self.vals[j, 2])
z.append(np.NaN) z.append(np.NaN)
self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), 'b-') self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), '-', color=self.color)
def modify(self, vals): def modify(self, vals):
self.vals = vals.copy() self.vals = vals.copy()
@ -450,7 +451,7 @@ class mocap_data_show(matplotlib_show):
self.initialize_axes_modify() self.initialize_axes_modify()
self.draw_vertices() self.draw_vertices()
self.initialize_axes() self.initialize_axes()
self.finalize_axes_modify() #self.finalize_axes_modify()
self.draw_edges() self.draw_edges()
self.axes.figure.canvas.draw() self.axes.figure.canvas.draw()
@ -469,12 +470,20 @@ class mocap_data_show(matplotlib_show):
self.line_handle[0].remove() self.line_handle[0].remove()
def finalize_axes(self): def finalize_axes(self):
self.axes.set_xlim(self.x_lim) # self.axes.set_xlim(self.x_lim)
self.axes.set_ylim(self.y_lim) # self.axes.set_ylim(self.y_lim)
self.axes.set_zlim(self.z_lim) # self.axes.set_zlim(self.z_lim)
self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.]) # self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.])
# self.axes.set_aspect('equal') extents = np.array([getattr(self.axes, 'get_{}lim'.format(dim))() for dim in 'xyz'])
sz = extents[:,1] - extents[:,0]
centers = np.mean(extents, axis=1)
maxsize = max(abs(sz))
r = maxsize/2
for ctr, dim in zip(centers, 'xyz'):
getattr(self.axes, 'set_{}lim'.format(dim))(ctr - r, ctr + r)
# self.axes.set_aspect('equal')
# self.axes.autoscale(enable=False) # self.axes.autoscale(enable=False)
def finalize_axes_modify(self): def finalize_axes_modify(self):
@ -494,7 +503,7 @@ class stick_show(mocap_data_show):
class skeleton_show(mocap_data_show): class skeleton_show(mocap_data_show):
"""data_show class for visualizing motion capture data encoded as a skeleton with angles.""" """data_show class for visualizing motion capture data encoded as a skeleton with angles."""
def __init__(self, vals, skel, axes=None, padding=0): def __init__(self, vals, skel, axes=None, padding=0, color='b'):
"""data_show class for visualizing motion capture data encoded as a skeleton with angles. """data_show class for visualizing motion capture data encoded as a skeleton with angles.
:param vals: set of modeled angles to use for printing in the axis when it's first created. :param vals: set of modeled angles to use for printing in the axis when it's first created.
:type vals: np.array :type vals: np.array
@ -506,7 +515,7 @@ class skeleton_show(mocap_data_show):
self.skel = skel self.skel = skel
self.padding = padding self.padding = padding
connect = skel.connection_matrix() connect = skel.connection_matrix()
mocap_data_show.__init__(self, vals, axes=axes, connect=connect) mocap_data_show.__init__(self, vals, axes=axes, connect=connect, color=color)
def process_values(self): def process_values(self):
"""Takes a set of angles and converts them to the x,y,z coordinates in the internal prepresentation of the class, ready for plotting. """Takes a set of angles and converts them to the x,y,z coordinates in the internal prepresentation of the class, ready for plotting.

View file

@ -9,8 +9,8 @@ These tests make sure that the opure python and cython codes work the same
class CythonTestChols(np.testing.TestCase): class CythonTestChols(np.testing.TestCase):
def setUp(self): def setUp(self):
self.flat = np.random.randn(45, 5) self.flat = np.random.randn(45,5)
self.triang = np.dstack([np.eye(20)[:,:,None] for i in range(3)]) self.triang = np.array([np.eye(20) for i in range(3)])
def test_flat_to_triang(self): def test_flat_to_triang(self):
L1 = choleskies._flat_to_triang_pure(self.flat) L1 = choleskies._flat_to_triang_pure(self.flat)
L2 = choleskies._flat_to_triang_cython(self.flat) L2 = choleskies._flat_to_triang_cython(self.flat)

View file

@ -49,7 +49,7 @@ class LinkFunctionTests(np.testing.TestCase):
self.assertTrue(grad3.checkgrad(verbose=True)) self.assertTrue(grad3.checkgrad(verbose=True))
if test_lim: if test_lim:
print "Testing limits" print("Testing limits")
#Remove some otherwise we are too close to the limit for gradcheck to work effectively #Remove some otherwise we are too close to the limit for gradcheck to work effectively
lim_of_inf = lim_of_inf - 1e-4 lim_of_inf = lim_of_inf - 1e-4
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf) grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf)

View file

@ -100,10 +100,10 @@ def block_dot(A, B, diagonal=False):
Dshape = D.shape Dshape = D.shape
if diagonal and (len(Cshape) == 1 or len(Dshape) == 1\ if diagonal and (len(Cshape) == 1 or len(Dshape) == 1\
or C.shape[0] != C.shape[1] or D.shape[0] != D.shape[1]): or C.shape[0] != C.shape[1] or D.shape[0] != D.shape[1]):
print "Broadcasting, C: {} D:{}".format(C.shape, D.shape) print("Broadcasting, C: {} D:{}".format(C.shape, D.shape))
return C*D return C*D
else: else:
print "Dotting, C: {} C:{}".format(C.shape, D.shape) print("Dotting, C: {} C:{}".format(C.shape, D.shape))
return np.dot(C,D) return np.dot(C,D)
dot = np.vectorize(f, otypes = [np.object]) dot = np.vectorize(f, otypes = [np.object])
return dot(A,B) return dot(A,B)

View file

@ -5,7 +5,7 @@ import numpy as np
from . import linalg from . import linalg
from .config import config from .config import config
import choleskies_cython from . import choleskies_cython
def safe_root(N): def safe_root(N):
i = np.sqrt(N) i = np.sqrt(N)
@ -17,12 +17,12 @@ def safe_root(N):
def _flat_to_triang_pure(flat_mat): def _flat_to_triang_pure(flat_mat):
N, D = flat_mat.shape N, D = flat_mat.shape
M = (-1 + safe_root(8*N+1))//2 M = (-1 + safe_root(8*N+1))//2
ret = np.zeros((M, M, D)) ret = np.zeros((D, M, M))
count = 0 for d in range(D):
for m in range(M): count = 0
for mm in range(m+1): for m in range(M):
for d in range(D): for mm in range(m+1):
ret.flat[d + m*D*M + mm*D] = flat_mat.flat[count]; ret[d,m, mm] = flat_mat[count, d];
count = count+1 count = count+1
return ret return ret
@ -33,15 +33,15 @@ def _flat_to_triang_cython(flat_mat):
def _triang_to_flat_pure(L): def _triang_to_flat_pure(L):
M, _, D = L.shape D, _, M = L.shape
N = M*(M+1)//2 N = M*(M+1)//2
flat = np.empty((N, D)) flat = np.empty((N, D))
count = 0; for d in range(D):
for m in range(M): count = 0;
for mm in range(m+1): for m in range(M):
for d in range(D): for mm in range(m+1):
flat.flat[count] = L.flat[d + m*D*M + mm*D]; flat[count,d] = L[d, m, mm]
count = count +1 count = count +1
return flat return flat
@ -59,12 +59,12 @@ def _backprop_gradient_pure(dL, L):
""" """
dL_dK = np.tril(dL).copy() dL_dK = np.tril(dL).copy()
N = L.shape[0] N = L.shape[0]
for k in xrange(N - 1, -1, -1): for k in range(N - 1, -1, -1):
for j in xrange(k + 1, N): for j in range(k + 1, N):
for i in xrange(j, N): for i in range(j, N):
dL_dK[i, k] -= dL_dK[i, j] * L[j, k] dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
dL_dK[j, k] -= dL_dK[i, j] * L[i, k] dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
for j in xrange(k + 1, N): for j in range(k + 1, N):
dL_dK[j, k] /= L[k, k] dL_dK[j, k] /= L[k, k]
dL_dK[k, k] -= L[j, k] * dL_dK[j, k] dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2 * L[k, k]) dL_dK[k, k] /= (2 * L[k, k])
@ -74,7 +74,7 @@ def triang_to_cov(L):
return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in range(L.shape[-1])]) return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in range(L.shape[-1])])
def multiple_dpotri(Ls): def multiple_dpotri(Ls):
return np.dstack([linalg.dpotri(np.asfortranarray(Ls[:,:,i]), lower=1)[0] for i in range(Ls.shape[-1])]) return np.array([linalg.dpotri(np.asfortranarray(Ls[i]), lower=1)[0] for i in range(Ls.shape[0])])
def indexes_to_fix_for_low_rank(rank, size): def indexes_to_fix_for_low_rank(rank, size):
""" """
@ -100,7 +100,7 @@ def indexes_to_fix_for_low_rank(rank, size):
if config.getboolean('cython', 'working'): if config.getboolean('cython', 'working'):
triang_to_flat = _triang_to_flat_cython triang_to_flat = _triang_to_flat_cython
flat_to_triang = _flat_to_triang_cython flat_to_triang = _flat_to_triang_cython
backprop_gradient = choleskies_cython.backprop_gradient backprop_gradient = choleskies_cython.backprop_gradient_par_c
else: else:
backprop_gradient = _backprop_gradient_pure backprop_gradient = _backprop_gradient_pure
triang_to_flat = _triang_to_flat_pure triang_to_flat = _triang_to_flat_pure

File diff suppressed because it is too large Load diff

View file

@ -5,31 +5,32 @@
# Copyright James Hensman and Alan Saul 2015 # Copyright James Hensman and Alan Saul 2015
import numpy as np import numpy as np
from cython.parallel import prange, parallel
cimport numpy as np cimport numpy as np
def flat_to_triang(np.ndarray[double, ndim=2] flat, int M): def flat_to_triang(np.ndarray[double, ndim=2] flat, int M):
"""take a matrix N x D and return a M X M x D array where """take a matrix N x D and return a D X M x M array where
N = M(M+1)/2 N = M(M+1)/2
the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat. the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
""" """
cdef int N = flat.shape[0]
cdef int D = flat.shape[1] cdef int D = flat.shape[1]
cdef int N = flat.shape[0]
cdef int count = 0 cdef int count = 0
cdef np.ndarray[double, ndim=3] ret = np.zeros((M, M, D)) cdef np.ndarray[double, ndim=3] ret = np.zeros((D, M, M))
cdef int d, m, mm cdef int d, m, mm
for d in range(D): for d in range(D):
count = 0 count = 0
for m in range(M): for m in range(M):
for mm in range(m+1): for mm in range(m+1):
ret[m, mm, d] = flat[count,d] ret[d, m, mm] = flat[count,d]
count += 1 count += 1
return ret return ret
def triang_to_flat(np.ndarray[double, ndim=3] L): def triang_to_flat(np.ndarray[double, ndim=3] L):
cdef int M = L.shape[0] cdef int D = L.shape[0]
cdef int D = L.shape[2] cdef int M = L.shape[1]
cdef int N = M*(M+1)/2 cdef int N = M*(M+1)/2
cdef int count = 0 cdef int count = 0
cdef np.ndarray[double, ndim=2] flat = np.empty((N, D)) cdef np.ndarray[double, ndim=2] flat = np.empty((N, D))
@ -38,7 +39,7 @@ def triang_to_flat(np.ndarray[double, ndim=3] L):
count = 0 count = 0
for m in range(M): for m in range(M):
for mm in range(m+1): for mm in range(m+1):
flat[count,d] = L[m, mm, d] flat[count,d] = L[d, m, mm]
count += 1 count += 1
return flat return flat
@ -57,3 +58,50 @@ def backprop_gradient(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2]
dL_dK[k, k] -= L[j, k] * dL_dK[j, k] dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2. * L[k, k]) dL_dK[k, k] /= (2. * L[k, k])
return dL_dK return dL_dK
def backprop_gradient_par(double[:,:] dL, double[:,:] L):
cdef double[:,:] dL_dK = np.tril(dL).copy()
cdef int N = L.shape[0]
cdef int k, j, i
for k in range(N - 1, -1, -1):
with nogil, parallel():
for i in prange(k + 1, N):
for j in range(k+1, i+1):
dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
for j in range(i, N):
dL_dK[i, k] -= dL_dK[j, i] * L[j, k]
for j in range(k + 1, N):
dL_dK[j, k] /= L[k, k]
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
dL_dK[k, k] /= (2. * L[k, k])
return dL_dK
#here's a pure C version...
cdef extern from "cholesky_backprop.h" nogil:
void chol_backprop(int N, double* dL, double* L)
def backprop_gradient_par_c(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL) # makes a copy, c-contig
cdef int N = L.shape[0]
with nogil:
chol_backprop(N, <double*> dL_dK.data, <double*> L.data)
return dL_dK
cdef extern from "cholesky_backprop.h" nogil:
void old_chol_backprop(int N, double* dL, double* L)
def backprop_gradient_par_c_old(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL) # makes a copy, c-contig
cdef int N = L.shape[0]
with nogil:
old_chol_backprop(N, <double*> dL_dK.data, <double*> L.data)
return dL_dK

View file

@ -0,0 +1,51 @@
#include <cblas.h>
void chol_backprop(int N, double* dL, double* L){
//at the input to this fn, dL is df_dL. after this fn is complet, dL is df_dK
int i,k;
dL[N*N - 1] /= (2. * L[N*N - 1]);
for(k=N-2;k>(-1);k--){
cblas_dsymv(CblasRowMajor, CblasLower,
N-k-1, -1,
&dL[(N*(k+1) + k+1)],N,
&L[k*N+k+1],1,
1, &dL[N*(k+1)+k], N);
for(i=0;i<(N-k-1); i++){
dL[N*(k+1+i)+k] -= dL[N*(k+1)+k+i*(N+1)+1] * L[k*N+k+1+i];
}
cblas_dscal(N-k-1, 1.0/L[k*N+k], &dL[(k+1)*N+k], N);
dL[k*N + k] -= cblas_ddot(N-k-1, &dL[(k+1)*N+k], N, &L[k*N+k+1], 1);
dL[k*N + k] /= (2.0 * L[k*N + k]);
}
}
double mydot(int n, double* a, int stride_a, double* b, int stride_b){
double ret = 0;
for(int i=0; i<n; i++){
ret += a[i*stride_a]*b[i*stride_b];
}
return ret;
}
void old_chol_backprop(int N, double* dL, double* U){
//at the input to this fn, dL is df_dL. after this fn is complet, dL is df_dK
int iN, kN,i,j,k;
dL[N*N-1] /= (2. * U[N*N-1]);
for(k=N-2;k>(-1);k--){
kN = k*N;
#pragma omp parallel for private(i,iN)
for(i=k+1; i<N; i++){
iN = i*N;
dL[iN+k] -= mydot(i-k, &dL[iN+k+1], 1, &U[kN+k+1], 1);
dL[iN+k] -= mydot(N-i, &dL[iN+i], N, &U[kN+i], 1);
}
for(i=(k + 1); i<N; i++){
iN = i*N;
dL[iN + k] /= U[kN + k];
dL[kN + k] -= U[kN + i] * dL[iN + k];
}
dL[kN + k] /= (2. * U[kN + k]);
}
}

View file

@ -0,0 +1,5 @@
#include <cblas.h>
void dsymv(int N, double*A, double*b, double*y);
double mydot(int n, double* a, int stride_a, double* b, int stride_b);
void chol_backprop(int N, double* dL, double* L);

View file

@ -15,7 +15,7 @@ import warnings
import os import os
from .config import config from .config import config
import logging import logging
import linalg_cython from . import linalg_cython
_scipyversion = np.float64((scipy.__version__).split('.')[:2]) _scipyversion = np.float64((scipy.__version__).split('.')[:2])

View file

@ -13,7 +13,6 @@ Continuous integration status: ![CI status](https://travis-ci.org/SheffieldML/GP
### Python 3 Compatibility ### Python 3 Compatibility
Work is underway to make GPy run on Python 3. Work is underway to make GPy run on Python 3.
* Python 2.x compatibility is currently broken in this fork
* All tests in the testsuite now run on Python3. * All tests in the testsuite now run on Python3.
To see this for yourself, in Ubuntu 14.04, you can do To see this for yourself, in Ubuntu 14.04, you can do
@ -21,12 +20,17 @@ To see this for yourself, in Ubuntu 14.04, you can do
git clone https://github.com/mikecroucher/GPy.git git clone https://github.com/mikecroucher/GPy.git
cd GPy cd GPy
git checkout devel git checkout devel
python3 setup.py build_ext --inplace
nosetests3 GPy/testing nosetests3 GPy/testing
nosetests3 is Ubuntu's way of reffering to the Python 3 version of nosetests. You install it with nosetests3 is Ubuntu's way of reffering to the Python 3 version of nosetests. You install it with
sudo apt-get install python3-nose sudo apt-get install python3-nose
The command `python3 setup.py build_ext --inplace` builds the Cython extensions. IF it doesn't work, you may need to install this:
sudo apt-get install python3-dev
* Test coverage is less than 100% so it is expected that there is still more work to be done. We need more tests and examples to try out. * Test coverage is less than 100% so it is expected that there is still more work to be done. We need more tests and examples to try out.
* All weave functions not covered by the test suite are *simply commented out*. Can add equivalents later as test functions become available * All weave functions not covered by the test suite are *simply commented out*. Can add equivalents later as test functions become available
* A set of benchmarks would be useful! * A set of benchmarks would be useful!

View file

@ -20,9 +20,10 @@ ext_mods = [Extension(name='GPy.kern._src.stationary_cython',
extra_compile_args=compile_flags, extra_compile_args=compile_flags,
extra_link_args = ['-lgomp']), extra_link_args = ['-lgomp']),
Extension(name='GPy.util.choleskies_cython', Extension(name='GPy.util.choleskies_cython',
sources=['GPy/util/choleskies_cython.c'], sources=['GPy/util/choleskies_cython.c', 'GPy/util/cholesky_backprop.c'],
include_dirs=[np.get_include()], include_dirs=[np.get_include()],
extra_compile_args=compile_flags), extra_link_args = ['-lgomp', '-lblas'],
extra_compile_args=compile_flags+['-std=c99']),
Extension(name='GPy.util.linalg_cython', Extension(name='GPy.util.linalg_cython',
sources=['GPy/util/linalg_cython.c'], sources=['GPy/util/linalg_cython.c'],
include_dirs=[np.get_include()], include_dirs=[np.get_include()],