mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-10 12:32:40 +02:00
Change crescent data to optimize with .optimize()
This commit is contained in:
commit
7c8176e5fd
47 changed files with 30168 additions and 1116 deletions
|
|
@ -89,7 +89,6 @@ class GP(Model):
|
||||||
assert mean_function.output_dim == self.output_dim
|
assert mean_function.output_dim == self.output_dim
|
||||||
self.link_parameter(mean_function)
|
self.link_parameter(mean_function)
|
||||||
|
|
||||||
|
|
||||||
#find a sensible inference method
|
#find a sensible inference method
|
||||||
logger.info("initializing inference method")
|
logger.info("initializing inference method")
|
||||||
if inference_method is None:
|
if inference_method is None:
|
||||||
|
|
@ -208,6 +207,7 @@ class GP(Model):
|
||||||
Kxx = kern.Kdiag(_Xnew)
|
Kxx = kern.Kdiag(_Xnew)
|
||||||
var = Kxx - np.sum(WiKx*Kx, 0)
|
var = Kxx - np.sum(WiKx*Kx, 0)
|
||||||
var = var.reshape(-1, 1)
|
var = var.reshape(-1, 1)
|
||||||
|
var[var<0.] = 0.
|
||||||
|
|
||||||
#force mu to be a column vector
|
#force mu to be a column vector
|
||||||
if len(mu.shape)==1: mu = mu[:,None]
|
if len(mu.shape)==1: mu = mu[:,None]
|
||||||
|
|
@ -229,13 +229,14 @@ class GP(Model):
|
||||||
:param Y_metadata: metadata about the predicting point to pass to the likelihood
|
:param Y_metadata: metadata about the predicting point to pass to the likelihood
|
||||||
:param kern: The kernel to use for prediction (defaults to the model
|
:param kern: The kernel to use for prediction (defaults to the model
|
||||||
kern). this is useful for examining e.g. subprocesses.
|
kern). this is useful for examining e.g. subprocesses.
|
||||||
:returns: (mean, var, lower_upper):
|
:returns: (mean, var):
|
||||||
mean: posterior mean, a Numpy array, Nnew x self.input_dim
|
mean: posterior mean, a Numpy array, Nnew x self.input_dim
|
||||||
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
||||||
lower_upper: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
|
|
||||||
|
|
||||||
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
|
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
|
||||||
This is to allow for different normalizations of the output dimensions.
|
This is to allow for different normalizations of the output dimensions.
|
||||||
|
|
||||||
|
Note: If you want the predictive quantiles (e.g. 95% confidence interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
|
||||||
"""
|
"""
|
||||||
#predict the latent function values
|
#predict the latent function values
|
||||||
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
|
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
|
||||||
|
|
@ -255,7 +256,7 @@ class GP(Model):
|
||||||
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
|
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
|
||||||
:type quantiles: tuple
|
:type quantiles: tuple
|
||||||
:returns: list of quantiles for each X and predictive quantiles for interval combination
|
:returns: list of quantiles for each X and predictive quantiles for interval combination
|
||||||
:rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)]
|
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
|
||||||
"""
|
"""
|
||||||
m, v = self._raw_predict(X, full_cov=False)
|
m, v = self._raw_predict(X, full_cov=False)
|
||||||
if self.normalizer is not None:
|
if self.normalizer is not None:
|
||||||
|
|
|
||||||
|
|
@ -76,7 +76,7 @@ class Model(Parameterized):
|
||||||
jobs = []
|
jobs = []
|
||||||
pool = mp.Pool(processes=num_processes)
|
pool = mp.Pool(processes=num_processes)
|
||||||
for i in range(num_restarts):
|
for i in range(num_restarts):
|
||||||
self.randomize()
|
if i>0: self.randomize()
|
||||||
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
|
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
|
||||||
jobs.append(job)
|
jobs.append(job)
|
||||||
|
|
||||||
|
|
@ -90,7 +90,7 @@ class Model(Parameterized):
|
||||||
for i in range(num_restarts):
|
for i in range(num_restarts):
|
||||||
try:
|
try:
|
||||||
if not parallel:
|
if not parallel:
|
||||||
self.randomize()
|
if i>0: self.randomize()
|
||||||
self.optimize(**kwargs)
|
self.optimize(**kwargs)
|
||||||
else:
|
else:
|
||||||
self.optimization_runs.append(jobs[i].get())
|
self.optimization_runs.append(jobs[i].get())
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import numpy
|
||||||
from numpy.lib.function_base import vectorize
|
from numpy.lib.function_base import vectorize
|
||||||
from .lists_and_dicts import IntArrayDict
|
from .lists_and_dicts import IntArrayDict
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
from transformations import Transformation
|
from .transformations import Transformation
|
||||||
|
|
||||||
def extract_properties_to_index(index, props):
|
def extract_properties_to_index(index, props):
|
||||||
prop_index = dict()
|
prop_index = dict()
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,11 @@ class Param(Parameterizable, ObsAr):
|
||||||
Fixing parameters will fix them to the value they are right now. If you change
|
Fixing parameters will fix them to the value they are right now. If you change
|
||||||
the fixed value, it will be fixed to the new value!
|
the fixed value, it will be fixed to the new value!
|
||||||
|
|
||||||
|
Important Note:
|
||||||
|
Multilevel indexing (e.g. self[:2][1:]) is not supported and might lead to unexpected behaviour.
|
||||||
|
Try to index in one go, using boolean indexing or the numpy builtin
|
||||||
|
np.index function.
|
||||||
|
|
||||||
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
|
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -430,23 +430,38 @@ class Indexable(Nameable, Updateable):
|
||||||
|
|
||||||
def log_prior(self):
|
def log_prior(self):
|
||||||
"""evaluate the prior"""
|
"""evaluate the prior"""
|
||||||
if self.priors.size > 0:
|
if self.priors.size == 0:
|
||||||
x = self.param_array
|
return 0.
|
||||||
#py3 fix
|
x = self.param_array
|
||||||
#return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
|
#evaluate the prior log densities
|
||||||
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
|
log_p = reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
|
||||||
return 0.
|
|
||||||
|
#account for the transformation by evaluating the log Jacobian (where things are transformed)
|
||||||
|
log_j = 0.
|
||||||
|
priored_indexes = np.hstack([i for p, i in self.priors.items()])
|
||||||
|
for c,j in self.constraints.items():
|
||||||
|
if not isinstance(c, Transformation):continue
|
||||||
|
for jj in j:
|
||||||
|
if jj in priored_indexes:
|
||||||
|
log_j += c.log_jacobian(x[jj])
|
||||||
|
return log_p + log_j
|
||||||
|
|
||||||
def _log_prior_gradients(self):
|
def _log_prior_gradients(self):
|
||||||
"""evaluate the gradients of the priors"""
|
"""evaluate the gradients of the priors"""
|
||||||
if self.priors.size > 0:
|
if self.priors.size == 0:
|
||||||
x = self.param_array
|
return 0.
|
||||||
ret = np.zeros(x.size)
|
x = self.param_array
|
||||||
#py3 fix
|
ret = np.zeros(x.size)
|
||||||
#[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
|
#compute derivate of prior density
|
||||||
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
|
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
|
||||||
return ret
|
#add in jacobian derivatives if transformed
|
||||||
return 0.
|
priored_indexes = np.hstack([i for p, i in self.priors.items()])
|
||||||
|
for c,j in self.constraints.items():
|
||||||
|
if not isinstance(c, Transformation):continue
|
||||||
|
for jj in j:
|
||||||
|
if jj in priored_indexes:
|
||||||
|
ret[jj] += c.log_jacobian_grad(x[jj])
|
||||||
|
return ret
|
||||||
|
|
||||||
#===========================================================================
|
#===========================================================================
|
||||||
# Tie parameters together
|
# Tie parameters together
|
||||||
|
|
|
||||||
|
|
@ -6,10 +6,10 @@ import numpy; np = numpy
|
||||||
import itertools
|
import itertools
|
||||||
from re import compile, _pattern_type
|
from re import compile, _pattern_type
|
||||||
from .param import ParamConcatenation
|
from .param import ParamConcatenation
|
||||||
from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
|
from .parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from index_operations import ParameterIndexOperationsView
|
from .index_operations import ParameterIndexOperationsView
|
||||||
logger = logging.getLogger("parameters changed meta")
|
logger = logging.getLogger("parameters changed meta")
|
||||||
|
|
||||||
class ParametersChangedMeta(type):
|
class ParametersChangedMeta(type):
|
||||||
|
|
|
||||||
|
|
@ -758,12 +758,12 @@ class DGPLVM_Lamda(Prior, Parameterized):
|
||||||
self.sigma2 = sigma2
|
self.sigma2 = sigma2
|
||||||
# self.x = x
|
# self.x = x
|
||||||
self.lbl = lbl
|
self.lbl = lbl
|
||||||
self.lamda = lamda
|
self.lamda = lamda
|
||||||
self.classnum = lbl.shape[1]
|
self.classnum = lbl.shape[1]
|
||||||
self.datanum = lbl.shape[0]
|
self.datanum = lbl.shape[0]
|
||||||
self.x_shape = x_shape
|
self.x_shape = x_shape
|
||||||
self.dim = x_shape[1]
|
self.dim = x_shape[1]
|
||||||
self.lamda = Param('lamda', np.diag(lamda))
|
self.lamda = Param('lamda', np.diag(lamda))
|
||||||
self.link_parameter(self.lamda)
|
self.link_parameter(self.lamda)
|
||||||
|
|
||||||
def get_class_label(self, y):
|
def get_class_label(self, y):
|
||||||
|
|
@ -789,7 +789,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
|
||||||
M_i = np.zeros((self.classnum, self.dim))
|
M_i = np.zeros((self.classnum, self.dim))
|
||||||
for i in cls:
|
for i in cls:
|
||||||
# Mean of each class
|
# Mean of each class
|
||||||
class_i = cls[i]
|
class_i = cls[i]
|
||||||
M_i[i] = np.mean(class_i, axis=0)
|
M_i[i] = np.mean(class_i, axis=0)
|
||||||
return M_i
|
return M_i
|
||||||
|
|
||||||
|
|
@ -899,8 +899,8 @@ class DGPLVM_Lamda(Prior, Parameterized):
|
||||||
#!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
#!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||||
#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()
|
#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()
|
||||||
|
|
||||||
xprime = x.dot(np.diagflat(self.lamda))
|
xprime = x.dot(np.diagflat(self.lamda))
|
||||||
x = xprime
|
x = xprime
|
||||||
# print x
|
# print x
|
||||||
cls = self.compute_cls(x)
|
cls = self.compute_cls(x)
|
||||||
M_0 = np.mean(x, axis=0)
|
M_0 = np.mean(x, axis=0)
|
||||||
|
|
@ -910,14 +910,14 @@ class DGPLVM_Lamda(Prior, Parameterized):
|
||||||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||||
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||||
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
||||||
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
|
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
|
||||||
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
|
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
|
||||||
|
|
||||||
# This function calculates derivative of the log of prior function
|
# This function calculates derivative of the log of prior function
|
||||||
def lnpdf_grad(self, x):
|
def lnpdf_grad(self, x):
|
||||||
x = x.reshape(self.x_shape)
|
x = x.reshape(self.x_shape)
|
||||||
xprime = x.dot(np.diagflat(self.lamda))
|
xprime = x.dot(np.diagflat(self.lamda))
|
||||||
x = xprime
|
x = xprime
|
||||||
# print x
|
# print x
|
||||||
cls = self.compute_cls(x)
|
cls = self.compute_cls(x)
|
||||||
M_0 = np.mean(x, axis=0)
|
M_0 = np.mean(x, axis=0)
|
||||||
|
|
@ -934,7 +934,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
|
||||||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||||
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||||
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
||||||
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
|
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
|
||||||
Sb_inv_N_trans = np.transpose(Sb_inv_N)
|
Sb_inv_N_trans = np.transpose(Sb_inv_N)
|
||||||
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
|
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
|
||||||
Sw_trans = np.transpose(Sw)
|
Sw_trans = np.transpose(Sw)
|
||||||
|
|
@ -951,14 +951,14 @@ class DGPLVM_Lamda(Prior, Parameterized):
|
||||||
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
|
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
|
||||||
DPxprim_Dx = DPxprim_Dx.T
|
DPxprim_Dx = DPxprim_Dx.T
|
||||||
|
|
||||||
DPxprim_Dlamda = DPx_Dx.dot(x)
|
DPxprim_Dlamda = DPx_Dx.dot(x)
|
||||||
|
|
||||||
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
|
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
|
||||||
DPxprim_Dlamda = DPxprim_Dlamda.T
|
DPxprim_Dlamda = DPxprim_Dlamda.T
|
||||||
|
|
||||||
self.lamda.gradient = np.diag(DPxprim_Dlamda)
|
self.lamda.gradient = np.diag(DPxprim_Dlamda)
|
||||||
# print DPxprim_Dx
|
# print DPxprim_Dx
|
||||||
return DPxprim_Dx
|
return DPxprim_Dx
|
||||||
|
|
||||||
|
|
||||||
# def frb(self, x):
|
# def frb(self, x):
|
||||||
|
|
@ -1139,8 +1139,8 @@ class DGPLVM_T(Prior):
|
||||||
# This function calculates log of our prior
|
# This function calculates log of our prior
|
||||||
def lnpdf(self, x):
|
def lnpdf(self, x):
|
||||||
x = x.reshape(self.x_shape)
|
x = x.reshape(self.x_shape)
|
||||||
xprim = x.dot(self.vec)
|
xprim = x.dot(self.vec)
|
||||||
x = xprim
|
x = xprim
|
||||||
# print x
|
# print x
|
||||||
cls = self.compute_cls(x)
|
cls = self.compute_cls(x)
|
||||||
M_0 = np.mean(x, axis=0)
|
M_0 = np.mean(x, axis=0)
|
||||||
|
|
@ -1156,11 +1156,11 @@ class DGPLVM_T(Prior):
|
||||||
|
|
||||||
# This function calculates derivative of the log of prior function
|
# This function calculates derivative of the log of prior function
|
||||||
def lnpdf_grad(self, x):
|
def lnpdf_grad(self, x):
|
||||||
x = x.reshape(self.x_shape)
|
x = x.reshape(self.x_shape)
|
||||||
xprim = x.dot(self.vec)
|
xprim = x.dot(self.vec)
|
||||||
x = xprim
|
x = xprim
|
||||||
# print x
|
# print x
|
||||||
cls = self.compute_cls(x)
|
cls = self.compute_cls(x)
|
||||||
M_0 = np.mean(x, axis=0)
|
M_0 = np.mean(x, axis=0)
|
||||||
M_i = self.compute_Mi(cls)
|
M_i = self.compute_Mi(cls)
|
||||||
Sb = self.compute_Sb(cls, M_i, M_0)
|
Sb = self.compute_Sb(cls, M_i, M_0)
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,16 @@ class Transformation(object):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def finv(self, model_param):
|
def finv(self, model_param):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
def log_jacobian(self, model_param):
|
||||||
|
"""
|
||||||
|
compute the log of the jacobian of f, evaluated at f(x)= model_param
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
def log_jacobian_grad(self, model_param):
|
||||||
|
"""
|
||||||
|
compute the drivative of the log of the jacobian of f, evaluated at f(x)= model_param
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
def gradfactor(self, model_param, dL_dmodel_param):
|
def gradfactor(self, model_param, dL_dmodel_param):
|
||||||
""" df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param,
|
""" df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param,
|
||||||
|
|
||||||
|
|
@ -74,9 +84,33 @@ class Logexp(Transformation):
|
||||||
if np.any(f < 0.):
|
if np.any(f < 0.):
|
||||||
print("Warning: changing parameters to satisfy constraints")
|
print("Warning: changing parameters to satisfy constraints")
|
||||||
return np.abs(f)
|
return np.abs(f)
|
||||||
|
def log_jacobian(self, model_param):
|
||||||
|
return np.where(model_param>_lim_val, model_param, np.log(np.exp(model_param+1e-20) - 1.)) - model_param
|
||||||
|
def log_jacobian_grad(self, model_param):
|
||||||
|
return 1./(np.exp(model_param)-1.)
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return '+ve'
|
return '+ve'
|
||||||
|
|
||||||
|
class Exponent(Transformation):
|
||||||
|
domain = _POSITIVE
|
||||||
|
def f(self, x):
|
||||||
|
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
|
||||||
|
def finv(self, x):
|
||||||
|
return np.log(x)
|
||||||
|
def gradfactor(self, f, df):
|
||||||
|
return np.einsum('i,i->i', df, f)
|
||||||
|
def initialize(self, f):
|
||||||
|
if np.any(f < 0.):
|
||||||
|
print("Warning: changing parameters to satisfy constraints")
|
||||||
|
return np.abs(f)
|
||||||
|
def log_jacobian(self, model_param):
|
||||||
|
return np.log(model_param)
|
||||||
|
def log_jacobian_grad(self, model_param):
|
||||||
|
return 1./model_param
|
||||||
|
def __str__(self):
|
||||||
|
return '+ve'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class NormalTheta(Transformation):
|
class NormalTheta(Transformation):
|
||||||
"Do not use, not officially supported!"
|
"Do not use, not officially supported!"
|
||||||
|
|
@ -417,22 +451,6 @@ class LogexpClipped(Logexp):
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return '+ve_c'
|
return '+ve_c'
|
||||||
|
|
||||||
class Exponent(Transformation):
|
|
||||||
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative Exponent below. See old MATLAB code.
|
|
||||||
domain = _POSITIVE
|
|
||||||
def f(self, x):
|
|
||||||
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
|
|
||||||
def finv(self, x):
|
|
||||||
return np.log(x)
|
|
||||||
def gradfactor(self, f, df):
|
|
||||||
return np.einsum('i,i->i', df, f)
|
|
||||||
def initialize(self, f):
|
|
||||||
if np.any(f < 0.):
|
|
||||||
print("Warning: changing parameters to satisfy constraints")
|
|
||||||
return np.abs(f)
|
|
||||||
def __str__(self):
|
|
||||||
return '+ve'
|
|
||||||
|
|
||||||
class NegativeExponent(Exponent):
|
class NegativeExponent(Exponent):
|
||||||
domain = _NEGATIVE
|
domain = _NEGATIVE
|
||||||
def f(self, x):
|
def f(self, x):
|
||||||
|
|
|
||||||
|
|
@ -36,8 +36,9 @@ class NormalPrior(VariationalPrior):
|
||||||
variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5
|
variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5
|
||||||
|
|
||||||
class SpikeAndSlabPrior(VariationalPrior):
|
class SpikeAndSlabPrior(VariationalPrior):
|
||||||
def __init__(self, pi=None, learnPi=False, variance = 1.0, name='SpikeAndSlabPrior', **kw):
|
def __init__(self, pi=None, learnPi=False, variance = 1.0, group_spike=False, name='SpikeAndSlabPrior', **kw):
|
||||||
super(SpikeAndSlabPrior, self).__init__(name=name, **kw)
|
super(SpikeAndSlabPrior, self).__init__(name=name, **kw)
|
||||||
|
self.group_spike = group_spike
|
||||||
self.variance = Param('variance',variance)
|
self.variance = Param('variance',variance)
|
||||||
self.learnPi = learnPi
|
self.learnPi = learnPi
|
||||||
if learnPi:
|
if learnPi:
|
||||||
|
|
@ -50,7 +51,10 @@ class SpikeAndSlabPrior(VariationalPrior):
|
||||||
def KL_divergence(self, variational_posterior):
|
def KL_divergence(self, variational_posterior):
|
||||||
mu = variational_posterior.mean
|
mu = variational_posterior.mean
|
||||||
S = variational_posterior.variance
|
S = variational_posterior.variance
|
||||||
gamma = variational_posterior.gamma.values
|
if self.group_spike:
|
||||||
|
gamma = variational_posterior.gamma.values[0]
|
||||||
|
else:
|
||||||
|
gamma = variational_posterior.gamma.values
|
||||||
if len(self.pi.shape)==2:
|
if len(self.pi.shape)==2:
|
||||||
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
|
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
|
||||||
pi = self.pi[idx]
|
pi = self.pi[idx]
|
||||||
|
|
@ -65,14 +69,21 @@ class SpikeAndSlabPrior(VariationalPrior):
|
||||||
def update_gradients_KL(self, variational_posterior):
|
def update_gradients_KL(self, variational_posterior):
|
||||||
mu = variational_posterior.mean
|
mu = variational_posterior.mean
|
||||||
S = variational_posterior.variance
|
S = variational_posterior.variance
|
||||||
gamma = variational_posterior.gamma.values
|
if self.group_spike:
|
||||||
|
gamma = variational_posterior.gamma.values[0]
|
||||||
|
else:
|
||||||
|
gamma = variational_posterior.gamma.values
|
||||||
if len(self.pi.shape)==2:
|
if len(self.pi.shape)==2:
|
||||||
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
|
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
|
||||||
pi = self.pi[idx]
|
pi = self.pi[idx]
|
||||||
else:
|
else:
|
||||||
pi = self.pi
|
pi = self.pi
|
||||||
|
|
||||||
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
if self.group_spike:
|
||||||
|
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))/variational_posterior.num_data
|
||||||
|
else:
|
||||||
|
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))
|
||||||
|
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||||
mu.gradient -= gamma*mu/self.variance
|
mu.gradient -= gamma*mu/self.variance
|
||||||
S.gradient -= (1./self.variance - 1./S) * gamma /2.
|
S.gradient -= (1./self.variance - 1./S) * gamma /2.
|
||||||
if self.learnPi:
|
if self.learnPi:
|
||||||
|
|
@ -154,13 +165,31 @@ class SpikeAndSlabPosterior(VariationalPosterior):
|
||||||
'''
|
'''
|
||||||
The SpikeAndSlab distribution for variational approximations.
|
The SpikeAndSlab distribution for variational approximations.
|
||||||
'''
|
'''
|
||||||
def __init__(self, means, variances, binary_prob, name='latent space'):
|
def __init__(self, means, variances, binary_prob, group_spike=False, sharedX=False, name='latent space'):
|
||||||
"""
|
"""
|
||||||
binary_prob : the probability of the distribution on the slab part.
|
binary_prob : the probability of the distribution on the slab part.
|
||||||
"""
|
"""
|
||||||
super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
|
super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
|
||||||
self.gamma = Param("binary_prob",binary_prob,Logistic(0.,1.))
|
self.group_spike = group_spike
|
||||||
self.link_parameter(self.gamma)
|
self.sharedX = sharedX
|
||||||
|
if sharedX:
|
||||||
|
self.mean.fix(warning=False)
|
||||||
|
self.variance.fix(warning=False)
|
||||||
|
if group_spike:
|
||||||
|
self.gamma_group = Param("binary_prob_group",binary_prob.mean(axis=0),Logistic(1e-10,1.-1e-10))
|
||||||
|
self.gamma = Param("binary_prob",binary_prob, __fixed__)
|
||||||
|
self.link_parameters(self.gamma_group,self.gamma)
|
||||||
|
else:
|
||||||
|
self.gamma = Param("binary_prob",binary_prob,Logistic(1e-10,1.-1e-10))
|
||||||
|
self.link_parameter(self.gamma)
|
||||||
|
|
||||||
|
def propogate_val(self):
|
||||||
|
if self.group_spike:
|
||||||
|
self.gamma.values[:] = self.gamma_group.values
|
||||||
|
|
||||||
|
def collate_gradient(self):
|
||||||
|
if self.group_spike:
|
||||||
|
self.gamma_group.gradient = self.gamma.gradient.reshape(self.gamma.shape).sum(axis=0)
|
||||||
|
|
||||||
def set_gradients(self, grad):
|
def set_gradients(self, grad):
|
||||||
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad
|
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad
|
||||||
|
|
@ -179,15 +208,15 @@ class SpikeAndSlabPosterior(VariationalPosterior):
|
||||||
n.parameters[dc['variance']._parent_index_] = dc['variance']
|
n.parameters[dc['variance']._parent_index_] = dc['variance']
|
||||||
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
|
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
|
||||||
n._gradient_array_ = None
|
n._gradient_array_ = None
|
||||||
oversize = self.size - self.mean.size - self.variance.size
|
oversize = self.size - self.mean.size - self.variance.size - self.gamma.size
|
||||||
n.size = n.mean.size + n.variance.size + oversize
|
n.size = n.mean.size + n.variance.size + n.gamma.size + oversize
|
||||||
n.ndim = n.mean.ndim
|
n.ndim = n.mean.ndim
|
||||||
n.shape = n.mean.shape
|
n.shape = n.mean.shape
|
||||||
n.num_data = n.mean.shape[0]
|
n.num_data = n.mean.shape[0]
|
||||||
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
|
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
|
||||||
return n
|
return n
|
||||||
else:
|
else:
|
||||||
return super(VariationalPrior, self).__getitem__(s)
|
return super(SpikeAndSlabPosterior, self).__getitem__(s)
|
||||||
|
|
||||||
def plot(self, *args, **kwargs):
|
def plot(self, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -133,7 +133,7 @@ class SparseGP(GP):
|
||||||
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
|
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
|
||||||
elif self.posterior.woodbury_inv.ndim == 3:
|
elif self.posterior.woodbury_inv.ndim == 3:
|
||||||
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
|
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
|
||||||
for i in range(var.shape[1]):
|
for i in range(var.shape[2]):
|
||||||
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
|
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
|
||||||
var = var
|
var = var
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ class SVGP(SparseGP):
|
||||||
num_latent_functions = Y.shape[1]
|
num_latent_functions = Y.shape[1]
|
||||||
|
|
||||||
self.m = Param('q_u_mean', np.zeros((self.num_inducing, num_latent_functions)))
|
self.m = Param('q_u_mean', np.zeros((self.num_inducing, num_latent_functions)))
|
||||||
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,num_latent_functions)))
|
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[None,:,:], (num_latent_functions, 1,1)))
|
||||||
self.chol = Param('q_u_chol', chol)
|
self.chol = Param('q_u_chol', chol)
|
||||||
self.link_parameter(self.chol)
|
self.link_parameter(self.chol)
|
||||||
self.link_parameter(self.m)
|
self.link_parameter(self.m)
|
||||||
|
|
|
||||||
|
|
@ -5,9 +5,10 @@ from __future__ import print_function
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import datetime
|
||||||
|
|
||||||
def exponents(fnow, current_grad):
|
def exponents(fnow, current_grad):
|
||||||
exps = [np.abs(np.float(fnow)), current_grad]
|
exps = [np.abs(np.float(fnow)), 1 if current_grad is np.nan else current_grad]
|
||||||
return np.sign(exps) * np.log10(exps).astype(int)
|
return np.sign(exps) * np.log10(exps).astype(int)
|
||||||
|
|
||||||
class VerboseOptimization(object):
|
class VerboseOptimization(object):
|
||||||
|
|
@ -23,6 +24,7 @@ class VerboseOptimization(object):
|
||||||
self.model.add_observer(self, self.print_status)
|
self.model.add_observer(self, self.print_status)
|
||||||
self.status = 'running'
|
self.status = 'running'
|
||||||
self.clear = clear_after_finish
|
self.clear = clear_after_finish
|
||||||
|
self.deltat = .2
|
||||||
|
|
||||||
self.update()
|
self.update()
|
||||||
|
|
||||||
|
|
@ -44,25 +46,25 @@ class VerboseOptimization(object):
|
||||||
self.hor_align = FlexBox(children = [left_col, right_col], width='100%', orientation='horizontal')
|
self.hor_align = FlexBox(children = [left_col, right_col], width='100%', orientation='horizontal')
|
||||||
|
|
||||||
display(self.hor_align)
|
display(self.hor_align)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.text.set_css('width', '100%')
|
self.text.set_css('width', '100%')
|
||||||
left_col.set_css({
|
left_col.set_css({
|
||||||
'padding': '2px',
|
'padding': '2px',
|
||||||
'width': "100%",
|
'width': "100%",
|
||||||
})
|
})
|
||||||
|
|
||||||
right_col.set_css({
|
right_col.set_css({
|
||||||
'padding': '2px',
|
'padding': '2px',
|
||||||
})
|
})
|
||||||
|
|
||||||
self.hor_align.set_css({
|
self.hor_align.set_css({
|
||||||
'width': "100%",
|
'width': "100%",
|
||||||
})
|
})
|
||||||
|
|
||||||
self.hor_align.remove_class('vbox')
|
self.hor_align.remove_class('vbox')
|
||||||
self.hor_align.add_class('hbox')
|
self.hor_align.add_class('hbox')
|
||||||
|
|
||||||
left_col.add_class("box-flex1")
|
left_col.add_class("box-flex1")
|
||||||
right_col.add_class('box-flex0')
|
right_col.add_class('box-flex0')
|
||||||
|
|
||||||
|
|
@ -74,16 +76,31 @@ class VerboseOptimization(object):
|
||||||
else:
|
else:
|
||||||
self.exps = exponents(self.fnow, self.current_gradient)
|
self.exps = exponents(self.fnow, self.current_gradient)
|
||||||
print('Running {} Code:'.format(self.opt_name))
|
print('Running {} Code:'.format(self.opt_name))
|
||||||
print(' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "secs", mi=self.len_maxiters))
|
print(' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "runtime", mi=self.len_maxiters))
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
self.start = time.time()
|
self.start = time.time()
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def print_out(self):
|
def print_out(self, seconds):
|
||||||
|
if seconds<60:
|
||||||
|
ms = (seconds%1)*100
|
||||||
|
self.timestring = "{s:0>2d}s{ms:0>2d}".format(s=int(seconds), ms=int(ms))
|
||||||
|
else:
|
||||||
|
m, s = divmod(seconds, 60)
|
||||||
|
if m>59:
|
||||||
|
h, m = divmod(m, 60)
|
||||||
|
if h>23:
|
||||||
|
d, h = divmod(h, 24)
|
||||||
|
self.timestring = '{d:0>2d}d{h:0>2d}h{m:0>2d}'.format(m=int(m), h=int(h), d=int(d))
|
||||||
|
else:
|
||||||
|
self.timestring = '{h:0>2d}h{m:0>2d}m{s:0>2d}'.format(m=int(m), s=int(s), h=int(h))
|
||||||
|
else:
|
||||||
|
ms = (seconds%1)*100
|
||||||
|
self.timestring = '{m:0>2d}m{s:0>2d}s{ms:0>2d}'.format(m=int(m), s=int(s), ms=int(ms))
|
||||||
if self.ipython_notebook:
|
if self.ipython_notebook:
|
||||||
names_vals = [['optimizer', "{:s}".format(self.opt_name)],
|
names_vals = [['optimizer', "{:s}".format(self.opt_name)],
|
||||||
['runtime [s]', "{:> g}".format(time.time()-self.start)],
|
['runtime', "{:>s}".format(self.timestring)],
|
||||||
['evaluation', "{:>0{l}}".format(self.iteration, l=self.len_maxiters)],
|
['evaluation', "{:>0{l}}".format(self.iteration, l=self.len_maxiters)],
|
||||||
['objective', "{: > 12.3E}".format(self.fnow)],
|
['objective', "{: > 12.3E}".format(self.fnow)],
|
||||||
['||gradient||', "{: >+12.3E}".format(float(self.current_gradient))],
|
['||gradient||', "{: >+12.3E}".format(float(self.current_gradient))],
|
||||||
|
|
@ -120,14 +137,18 @@ class VerboseOptimization(object):
|
||||||
if b:
|
if b:
|
||||||
self.exps = n_exps
|
self.exps = n_exps
|
||||||
print('\r', end=' ')
|
print('\r', end=' ')
|
||||||
print('{3:> 7.2g} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), time.time()-self.start, mi=self.len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
print('{3:} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), "{:>8s}".format(self.timestring), mi=self.len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
def print_status(self, me, which=None):
|
def print_status(self, me, which=None):
|
||||||
self.update()
|
self.update()
|
||||||
|
|
||||||
|
seconds = time.time()-self.start
|
||||||
#sys.stdout.write(" "*len(self.message))
|
#sys.stdout.write(" "*len(self.message))
|
||||||
self.print_out()
|
self.deltat += seconds
|
||||||
|
if self.deltat > .2:
|
||||||
|
self.print_out(seconds)
|
||||||
|
self.deltat = 0
|
||||||
|
|
||||||
self.iteration += 1
|
self.iteration += 1
|
||||||
|
|
||||||
|
|
@ -153,12 +174,12 @@ class VerboseOptimization(object):
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
self.stop = time.time()
|
self.stop = time.time()
|
||||||
self.model.remove_observer(self)
|
self.model.remove_observer(self)
|
||||||
self.print_out()
|
self.print_out(self.stop - self.start)
|
||||||
|
|
||||||
if not self.ipython_notebook:
|
if not self.ipython_notebook:
|
||||||
print()
|
print()
|
||||||
print('Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start))
|
print('Runtime: {}'.format("{:>9s}".format(self.timestring)))
|
||||||
print('Optimization status: {0}'.format(self.status))
|
print('Optimization status: {0}'.format(self.status))
|
||||||
print()
|
print()
|
||||||
elif self.clear:
|
elif self.clear:
|
||||||
self.hor_align.close()
|
self.hor_align.close()
|
||||||
|
|
|
||||||
|
|
@ -217,9 +217,8 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
|
||||||
elif model_type == 'FITC':
|
elif model_type == 'FITC':
|
||||||
m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
|
m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
|
||||||
m['.*len'] = 3.
|
m['.*len'] = 3.
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
m.pseudo_EM()
|
m.optimize()
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
m.plot()
|
m.plot()
|
||||||
|
|
|
||||||
|
|
@ -355,13 +355,13 @@ def ssgplvm_simulation(optimize=True, verbose=1,
|
||||||
Y = Ylist[0]
|
Y = Ylist[0]
|
||||||
k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||||
# k = kern.RBF(Q, ARD=True, lengthscale=10.)
|
# k = kern.RBF(Q, ARD=True, lengthscale=10.)
|
||||||
m = SSGPLVM(Y, Q, init="pca", num_inducing=num_inducing, kernel=k)
|
m = SSGPLVM(Y, Q, init="rand", num_inducing=num_inducing, kernel=k, group_spike=True)
|
||||||
m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape)
|
m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape)
|
||||||
m.likelihood.variance = .1
|
m.likelihood.variance = .01
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
print("Optimizing model:")
|
print("Optimizing model:")
|
||||||
m.optimize('scg', messages=verbose, max_iters=max_iters,
|
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
|
||||||
gtol=.05)
|
gtol=.05)
|
||||||
if plot:
|
if plot:
|
||||||
m.X.plot("SSGPLVM Latent Space 1D")
|
m.X.plot("SSGPLVM Latent Space 1D")
|
||||||
|
|
|
||||||
|
|
@ -45,17 +45,23 @@ class InferenceX(Model):
|
||||||
super(InferenceX, self).__init__(name)
|
super(InferenceX, self).__init__(name)
|
||||||
self.likelihood = model.likelihood.copy()
|
self.likelihood = model.likelihood.copy()
|
||||||
self.kern = model.kern.copy()
|
self.kern = model.kern.copy()
|
||||||
if model.kern.useGPU:
|
# if model.kern.useGPU:
|
||||||
from ...models import SSGPLVM
|
# from ...models import SSGPLVM
|
||||||
if isinstance(model, SSGPLVM):
|
# if isinstance(model, SSGPLVM):
|
||||||
self.kern.GPU_SSRBF(True)
|
# self.kern.GPU_SSRBF(True)
|
||||||
else:
|
# else:
|
||||||
self.kern.GPU(True)
|
# self.kern.GPU(True)
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
self.posterior = deepcopy(model.posterior)
|
self.posterior = deepcopy(model.posterior)
|
||||||
if hasattr(model, 'variational_prior'):
|
if hasattr(model, 'variational_prior'):
|
||||||
self.uncertain_input = True
|
self.uncertain_input = True
|
||||||
self.variational_prior = model.variational_prior.copy()
|
from ...models.ss_gplvm import IBPPrior
|
||||||
|
from ...models.ss_mrd import IBPPrior_SSMRD
|
||||||
|
if isinstance(model.variational_prior, IBPPrior) or isinstance(model.variational_prior, IBPPrior_SSMRD):
|
||||||
|
from ...core.parameterization.variational import SpikeAndSlabPrior
|
||||||
|
self.variational_prior = SpikeAndSlabPrior(pi=05,learnPi=False, group_spike=False)
|
||||||
|
else:
|
||||||
|
self.variational_prior = model.variational_prior.copy()
|
||||||
else:
|
else:
|
||||||
self.uncertain_input = False
|
self.uncertain_input = False
|
||||||
if hasattr(model, 'inducing_inputs'):
|
if hasattr(model, 'inducing_inputs'):
|
||||||
|
|
@ -147,9 +153,9 @@ class InferenceX(Model):
|
||||||
from ...core.parameterization.variational import SpikeAndSlabPrior
|
from ...core.parameterization.variational import SpikeAndSlabPrior
|
||||||
if isinstance(self.variational_prior, SpikeAndSlabPrior):
|
if isinstance(self.variational_prior, SpikeAndSlabPrior):
|
||||||
# Update Log-likelihood
|
# Update Log-likelihood
|
||||||
KL_div = self.variational_prior.KL_divergence(self.X, N=self.Y.shape[0])
|
KL_div = self.variational_prior.KL_divergence(self.X)
|
||||||
# update for the KL divergence
|
# update for the KL divergence
|
||||||
self.variational_prior.update_gradients_KL(self.X, N=self.Y.shape[0])
|
self.variational_prior.update_gradients_KL(self.X)
|
||||||
else:
|
else:
|
||||||
# Update Log-likelihood
|
# Update Log-likelihood
|
||||||
KL_div = self.variational_prior.KL_divergence(self.X)
|
KL_div = self.variational_prior.KL_divergence(self.X)
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ from ...util import linalg
|
||||||
from ...util import choleskies
|
from ...util import choleskies
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .posterior import Posterior
|
from .posterior import Posterior
|
||||||
|
from scipy.linalg.blas import dgemm, dsymm, dtrmm
|
||||||
|
|
||||||
class SVGP(LatentFunctionInference):
|
class SVGP(LatentFunctionInference):
|
||||||
|
|
||||||
|
|
@ -16,16 +17,13 @@ class SVGP(LatentFunctionInference):
|
||||||
|
|
||||||
|
|
||||||
S = np.empty((num_outputs, num_inducing, num_inducing))
|
S = np.empty((num_outputs, num_inducing, num_inducing))
|
||||||
[np.dot(L[:,:,i], L[:,:,i].T, S[i,:,:]) for i in range(num_outputs)]
|
[np.dot(L[i,:,:], L[i,:,:].T, S[i,:,:]) for i in range(num_outputs)]
|
||||||
S = S.swapaxes(0,2)
|
|
||||||
#Si,_ = linalg.dpotri(np.asfortranarray(L), lower=1)
|
#Si,_ = linalg.dpotri(np.asfortranarray(L), lower=1)
|
||||||
Si = choleskies.multiple_dpotri(L)
|
Si = choleskies.multiple_dpotri(L)
|
||||||
logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])])
|
logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[i,:,:])))) for i in range(L.shape[0])])
|
||||||
|
|
||||||
if np.any(np.isinf(Si)):
|
if np.any(np.isinf(Si)):
|
||||||
raise ValueError("Cholesky representation unstable")
|
raise ValueError("Cholesky representation unstable")
|
||||||
#S = S + np.eye(S.shape[0])*1e-5*np.max(np.max(S))
|
|
||||||
#Si, Lnew, _,_ = linalg.pdinv(S)
|
|
||||||
|
|
||||||
#compute mean function stuff
|
#compute mean function stuff
|
||||||
if mean_function is not None:
|
if mean_function is not None:
|
||||||
|
|
@ -35,27 +33,31 @@ class SVGP(LatentFunctionInference):
|
||||||
prior_mean_u = np.zeros((num_inducing, num_outputs))
|
prior_mean_u = np.zeros((num_inducing, num_outputs))
|
||||||
prior_mean_f = np.zeros((num_data, num_outputs))
|
prior_mean_f = np.zeros((num_data, num_outputs))
|
||||||
|
|
||||||
|
|
||||||
#compute kernel related stuff
|
#compute kernel related stuff
|
||||||
Kmm = kern.K(Z)
|
Kmm = kern.K(Z)
|
||||||
Knm = kern.K(X, Z)
|
Kmn = kern.K(Z, X)
|
||||||
Knn_diag = kern.Kdiag(X)
|
Knn_diag = kern.Kdiag(X)
|
||||||
Kmmi, Lm, Lmi, logdetKmm = linalg.pdinv(Kmm)
|
Lm = linalg.jitchol(Kmm)
|
||||||
|
logdetKmm = 2.*np.sum(np.log(np.diag(Lm)))
|
||||||
|
Kmmi, _ = linalg.dpotri(Lm)
|
||||||
|
|
||||||
#compute the marginal means and variances of q(f)
|
#compute the marginal means and variances of q(f)
|
||||||
A = np.dot(Knm, Kmmi)
|
A, _ = linalg.dpotrs(Lm, Kmn)
|
||||||
mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
|
mu = prior_mean_f + np.dot(A.T, q_u_mean - prior_mean_u)
|
||||||
#v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jlk->ilk', A, S),1)
|
v = np.empty((num_data, num_outputs))
|
||||||
v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * linalg.ij_jlk_to_ilk(A, S),1)
|
for i in range(num_outputs):
|
||||||
|
tmp = dtrmm(1.0,L[i].T, A, lower=0, trans_a=0)
|
||||||
|
v[:,i] = np.sum(np.square(tmp),0)
|
||||||
|
v += (Knn_diag - np.sum(A*Kmn,0))[:,None]
|
||||||
|
|
||||||
#compute the KL term
|
#compute the KL term
|
||||||
Kmmim = np.dot(Kmmi, q_u_mean)
|
Kmmim = np.dot(Kmmi, q_u_mean)
|
||||||
KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[:,:,None]*S,0).sum(0) + 0.5*np.sum(q_u_mean*Kmmim,0)
|
KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[None,:,:]*S,1).sum(1) + 0.5*np.sum(q_u_mean*Kmmim,0)
|
||||||
KL = KLs.sum()
|
KL = KLs.sum()
|
||||||
#gradient of the KL term (assuming zero mean function)
|
#gradient of the KL term (assuming zero mean function)
|
||||||
dKL_dm = Kmmim.copy()
|
dKL_dm = Kmmim.copy()
|
||||||
dKL_dS = 0.5*(Kmmi[:,:,None] - Si)
|
dKL_dS = 0.5*(Kmmi[None,:,:] - Si)
|
||||||
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
|
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(0)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
|
||||||
|
|
||||||
if mean_function is not None:
|
if mean_function is not None:
|
||||||
#adjust KL term for mean function
|
#adjust KL term for mean function
|
||||||
|
|
@ -80,17 +82,20 @@ class SVGP(LatentFunctionInference):
|
||||||
dF_dthetaL = dF_dthetaL.sum(1).sum(1)*batch_scale
|
dF_dthetaL = dF_dthetaL.sum(1).sum(1)*batch_scale
|
||||||
|
|
||||||
#derivatives of expected likelihood, assuming zero mean function
|
#derivatives of expected likelihood, assuming zero mean function
|
||||||
Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal
|
Adv = A[None,:,:]*dF_dv.T[:,None,:] # As if dF_Dv is diagonal, D, M, N
|
||||||
Admu = A.T.dot(dF_dmu)
|
Admu = A.dot(dF_dmu)
|
||||||
AdvA = np.dstack([np.dot(A.T, Adv[:,:,i].T) for i in range(num_outputs)])
|
Adv = np.ascontiguousarray(Adv) # makes for faster operations later...(inc dsymm)
|
||||||
#tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi)
|
AdvA = np.dot(Adv.reshape(-1, num_data),A.T).reshape(num_outputs, num_inducing, num_inducing )
|
||||||
tmp = linalg.ijk_jlk_to_il(AdvA, S).dot(Kmmi)
|
tmp = np.sum([np.dot(a,s) for a, s in zip(AdvA, S)],0).dot(Kmmi)
|
||||||
dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(-1) - tmp - tmp.T
|
dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(0) - tmp - tmp.T
|
||||||
dF_dKmm = 0.5*(dF_dKmm + dF_dKmm.T) # necessary? GPy bug?
|
dF_dKmm = 0.5*(dF_dKmm + dF_dKmm.T) # necessary? GPy bug?
|
||||||
#tmp = 2.*(np.einsum('ij,jlk->ilk', Kmmi,S) - np.eye(num_inducing)[:,:,None])
|
tmp = S.reshape(-1, num_inducing).dot(Kmmi).reshape(num_outputs, num_inducing , num_inducing )
|
||||||
tmp = 2.*(linalg.ij_jlk_to_ilk(Kmmi, S) - np.eye(num_inducing)[:,:,None])
|
tmp = 2.*(tmp - np.eye(num_inducing)[None, :,:])
|
||||||
#dF_dKmn = np.einsum('ijk,jlk->il', tmp, Adv) + Kmmim.dot(dF_dmu.T)
|
|
||||||
dF_dKmn = linalg.ijk_jlk_to_il(tmp, Adv) + Kmmim.dot(dF_dmu.T)
|
dF_dKmn = Kmmim.dot(dF_dmu.T)
|
||||||
|
for a,b in zip(tmp, Adv):
|
||||||
|
dF_dKmn += np.dot(a.T, b)
|
||||||
|
|
||||||
dF_dm = Admu
|
dF_dm = Admu
|
||||||
dF_dS = AdvA
|
dF_dS = AdvA
|
||||||
|
|
||||||
|
|
@ -106,11 +111,11 @@ class SVGP(LatentFunctionInference):
|
||||||
log_marginal = F.sum() - KL
|
log_marginal = F.sum() - KL
|
||||||
dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn
|
dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn
|
||||||
|
|
||||||
dL_dchol = np.dstack([2.*np.dot(dL_dS[:,:,i], L[:,:,i]) for i in range(num_outputs)])
|
dL_dchol = 2.*np.array([np.dot(a,b) for a, b in zip(dL_dS, L) ])
|
||||||
dL_dchol = choleskies.triang_to_flat(dL_dchol)
|
dL_dchol = choleskies.triang_to_flat(dL_dchol)
|
||||||
|
|
||||||
grad_dict = {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv.sum(1), 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
|
grad_dict = {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv.sum(1), 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
|
||||||
if mean_function is not None:
|
if mean_function is not None:
|
||||||
grad_dict['dL_dmfZ'] = dF_dmfZ - dKL_dmfZ
|
grad_dict['dL_dmfZ'] = dF_dmfZ - dKL_dmfZ
|
||||||
grad_dict['dL_dmfX'] = dF_dmfX
|
grad_dict['dL_dmfX'] = dF_dmfX
|
||||||
return Posterior(mean=q_u_mean, cov=S, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict
|
return Posterior(mean=q_u_mean, cov=S.T, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict
|
||||||
|
|
|
||||||
|
|
@ -142,7 +142,7 @@ class opt_lbfgsb(Optimizer):
|
||||||
|
|
||||||
#a more helpful error message is available in opt_result in the Error case
|
#a more helpful error message is available in opt_result in the Error case
|
||||||
if opt_result[2]['warnflag']==2:
|
if opt_result[2]['warnflag']==2:
|
||||||
self.status = 'Error' + opt_result[2]['task']
|
self.status = 'Error' + str(opt_result[2]['task'])
|
||||||
|
|
||||||
class opt_simplex(Optimizer):
|
class opt_simplex(Optimizer):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
|
|
||||||
|
|
@ -19,5 +19,5 @@ from ._src.splitKern import SplitKern,DEtime
|
||||||
from ._src.splitKern import DEtime as DiffGenomeKern
|
from ._src.splitKern import DEtime as DiffGenomeKern
|
||||||
|
|
||||||
|
|
||||||
from _src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
|
from ._src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import numpy as np
|
||||||
from ...core.parameterization import Param
|
from ...core.parameterization import Param
|
||||||
from ...core.parameterization.transformations import Logexp
|
from ...core.parameterization.transformations import Logexp
|
||||||
from ...util.config import config # for assesing whether to use cython
|
from ...util.config import config # for assesing whether to use cython
|
||||||
import coregionalize_cython
|
from . import coregionalize_cython
|
||||||
|
|
||||||
class Coregionalize(Kern):
|
class Coregionalize(Kern):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -105,7 +105,7 @@ class IndependentOutputs(CombinationKernel):
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
# TODO: make use of index_to_slices
|
# TODO: make use of index_to_slices
|
||||||
# FIXME: Broken as X is already sliced out
|
# FIXME: Broken as X is already sliced out
|
||||||
print "Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!"
|
print("Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!")
|
||||||
values = np.unique(X[:,self.index_dim])
|
values = np.unique(X[:,self.index_dim])
|
||||||
slices = [X[:,self.index_dim]==i for i in values]
|
slices = [X[:,self.index_dim]==i for i in values]
|
||||||
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
|
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
|
||||||
|
|
|
||||||
|
|
@ -78,7 +78,7 @@ class MLP(Kern):
|
||||||
*((vec1[:, None]+vec2[None, :])*self.weight_variance
|
*((vec1[:, None]+vec2[None, :])*self.weight_variance
|
||||||
+ 2*self.bias_variance + 2.))*base_cov_grad).sum()
|
+ 2*self.bias_variance + 2.))*base_cov_grad).sum()
|
||||||
|
|
||||||
def update_gradients_diag(self, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
self._K_diag_computations(X)
|
self._K_diag_computations(X)
|
||||||
self.variance.gradient = np.sum(self._K_diag_dvar*dL_dKdiag)
|
self.variance.gradient = np.sum(self._K_diag_dvar*dL_dKdiag)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,6 @@ class RBF(Stationary):
|
||||||
_support_GPU = True
|
_support_GPU = True
|
||||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
|
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
|
||||||
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
|
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
|
||||||
self.psicomp = PSICOMP_RBF()
|
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
self.psicomp = PSICOMP_RBF_GPU()
|
self.psicomp = PSICOMP_RBF_GPU()
|
||||||
else:
|
else:
|
||||||
|
|
@ -36,6 +35,7 @@ class RBF(Stationary):
|
||||||
dc = super(RBF, self).__getstate__()
|
dc = super(RBF, self).__getstate__()
|
||||||
if self.useGPU:
|
if self.useGPU:
|
||||||
dc['psicomp'] = PSICOMP_RBF()
|
dc['psicomp'] = PSICOMP_RBF()
|
||||||
|
dc['useGPU'] = False
|
||||||
return dc
|
return dc
|
||||||
|
|
||||||
def __setstate__(self, state):
|
def __setstate__(self, state):
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,9 @@ from ...util.config import config # for assesing whether to use cython
|
||||||
from ...util.caching import Cache_this
|
from ...util.caching import Cache_this
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import stationary_cython
|
from . import stationary_cython
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print('warning: failed to import cython module: falling back to numpy')
|
print('warning in sationary: failed to import cython module: falling back to numpy')
|
||||||
config.set('cython', 'working', 'false')
|
config.set('cython', 'working', 'false')
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,7 +1,9 @@
|
||||||
#cython: boundscheck=False
|
#cython: boundscheck=False
|
||||||
|
#cython: nonecheck=False
|
||||||
#cython: wraparound=False
|
#cython: wraparound=False
|
||||||
import numpy as np
|
import numpy as np
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
|
from cython.parallel import prange
|
||||||
|
|
||||||
ctypedef np.float64_t DTYPE_t
|
ctypedef np.float64_t DTYPE_t
|
||||||
|
|
||||||
|
|
@ -22,7 +24,18 @@ def grad_X(int N, int D, int M,
|
||||||
cdef double *grad = <double*> _grad.data
|
cdef double *grad = <double*> _grad.data
|
||||||
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
|
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
|
||||||
|
|
||||||
def lengthscale_grads(int N, int M, int Q,
|
def grad_X_cython(int N, int D, int M, double[:,:] X, double[:,:] X2, double[:,:] tmp, double[:,:] grad):
|
||||||
|
cdef int n,d,nd,m
|
||||||
|
for nd in prange(N*D, nogil=True):
|
||||||
|
n = nd/D
|
||||||
|
d = nd%D
|
||||||
|
grad[n,d] = 0.0
|
||||||
|
for m in range(M):
|
||||||
|
grad[n,d] += tmp[n,m]*(X[n,d]-X2[m,d])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def lengthscale_grads_in_c(int N, int M, int Q,
|
||||||
np.ndarray[DTYPE_t, ndim=2] _tmp,
|
np.ndarray[DTYPE_t, ndim=2] _tmp,
|
||||||
np.ndarray[DTYPE_t, ndim=2] _X,
|
np.ndarray[DTYPE_t, ndim=2] _X,
|
||||||
np.ndarray[DTYPE_t, ndim=2] _X2,
|
np.ndarray[DTYPE_t, ndim=2] _X2,
|
||||||
|
|
@ -33,4 +46,14 @@ def lengthscale_grads(int N, int M, int Q,
|
||||||
cdef double *grad = <double*> _grad.data
|
cdef double *grad = <double*> _grad.data
|
||||||
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
|
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
|
||||||
|
|
||||||
|
def lengthscale_grads(int N, int M, int Q, double[:,:] tmp, double[:,:] X, double[:,:] X2, double[:] grad):
|
||||||
|
cdef int q, n, m
|
||||||
|
cdef double gradq, dist
|
||||||
|
for q in range(Q):
|
||||||
|
grad[q] = 0.0
|
||||||
|
for n in range(N):
|
||||||
|
for m in range(M):
|
||||||
|
dist = X[n,q] - X2[m,q]
|
||||||
|
grad[q] += tmp[n,m]*dist*dist
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,36 @@
|
||||||
void _grad_X(int N, int D, int M, double* X, double* X2, double* tmp, double* grad){
|
void _grad_X(int N, int D, int M, double* X, double* X2, double* tmp, double* grad){
|
||||||
int n,m,d;
|
|
||||||
double retnd;
|
double retnd;
|
||||||
//#pragma omp parallel for private(n,d, retnd, m)
|
int n,d,nd,m;
|
||||||
for(d=0;d<D;d++){
|
#pragma omp parallel for private(nd,n,d, retnd, m)
|
||||||
for(n=0;n<N;n++){
|
for(nd=0;nd<(D*N);nd++){
|
||||||
retnd = 0.0;
|
n = nd/D;
|
||||||
for(m=0;m<M;m++){
|
d = nd%D;
|
||||||
retnd += tmp[n*M+m]*(X[n*D+d]-X2[m*D+d]);
|
retnd = 0.0;
|
||||||
}
|
for(m=0;m<M;m++){
|
||||||
grad[n*D+d] = retnd;
|
retnd += tmp[n*M+m]*(X[nd]-X2[m*D+d]);
|
||||||
}
|
}
|
||||||
|
grad[nd] = retnd;
|
||||||
}
|
}
|
||||||
} //grad_X
|
} //grad_X
|
||||||
|
|
||||||
|
|
||||||
|
void _lengthscale_grads_unsafe(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
|
||||||
|
int n,m,nm,q,nQ,mQ;
|
||||||
|
double dist;
|
||||||
|
#pragma omp parallel for private(n,m,nm,q,nQ,mQ,dist)
|
||||||
|
for(nm=0; nm<(N*M); nm++){
|
||||||
|
n = nm/M;
|
||||||
|
m = nm%M;
|
||||||
|
nQ = n*Q;
|
||||||
|
mQ = m*Q;
|
||||||
|
for(q=0; q<Q; q++){
|
||||||
|
dist = X[nQ+q]-X2[mQ+q];
|
||||||
|
grad[q] += tmp[nm]*dist*dist;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} //lengthscale_grads
|
||||||
|
|
||||||
|
|
||||||
void _lengthscale_grads(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
|
void _lengthscale_grads(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
|
||||||
int n,m,q;
|
int n,m,q;
|
||||||
double gradq, dist;
|
double gradq, dist;
|
||||||
|
|
@ -33,3 +50,5 @@ for(q=0; q<Q; q++){
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -143,7 +143,7 @@ class Likelihood(Parameterized):
|
||||||
|
|
||||||
p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf)
|
p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf)
|
||||||
for yi, mi, vi, yi_m in zipped_values])
|
for yi, mi, vi, yi_m in zipped_values])
|
||||||
p_ystar = np.array(p_ystar).reshape(-1, 1)
|
p_ystar = np.array(p_ystar).reshape(*y_test.shape)
|
||||||
return np.log(p_ystar)
|
return np.log(p_ystar)
|
||||||
|
|
||||||
def log_predictive_density_sampling(self, y_test, mu_star, var_star, Y_metadata=None, num_samples=1000):
|
def log_predictive_density_sampling(self, y_test, mu_star, var_star, Y_metadata=None, num_samples=1000):
|
||||||
|
|
@ -173,6 +173,7 @@ class Likelihood(Parameterized):
|
||||||
|
|
||||||
from scipy.misc import logsumexp
|
from scipy.misc import logsumexp
|
||||||
log_p_ystar = -np.log(num_samples) + logsumexp(self.logpdf(fi_samples, y_test, Y_metadata=Y_metadata), axis=1)
|
log_p_ystar = -np.log(num_samples) + logsumexp(self.logpdf(fi_samples, y_test, Y_metadata=Y_metadata), axis=1)
|
||||||
|
log_p_ystar = np.array(log_p_ystar).reshape(*y_test.shape)
|
||||||
return log_p_ystar
|
return log_p_ystar
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -265,8 +266,8 @@ class Likelihood(Parameterized):
|
||||||
stop
|
stop
|
||||||
|
|
||||||
if self.size:
|
if self.size:
|
||||||
dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None]) # Ntheta x (orig size) x N_{quad_points}
|
dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None], Y_metadata=Y_metadata) # Ntheta x (orig size) x N_{quad_points}
|
||||||
dF_dtheta = np.dot(dF_dtheta, gh_w)
|
dF_dtheta = np.dot(dF_dtheta, gh_w)/np.sqrt(np.pi)
|
||||||
dF_dtheta = dF_dtheta.reshape(self.size, shape[0], shape[1])
|
dF_dtheta = dF_dtheta.reshape(self.size, shape[0], shape[1])
|
||||||
else:
|
else:
|
||||||
dF_dtheta = None # Not yet implemented
|
dF_dtheta = None # Not yet implemented
|
||||||
|
|
|
||||||
|
|
@ -64,9 +64,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||||
self.logger.debug("creating inference_method var_dtc")
|
self.logger.debug("creating inference_method var_dtc")
|
||||||
inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1])
|
inference_method = VarDTC(limit=1 if not missing_data else Y.shape[1])
|
||||||
|
|
||||||
if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
|
|
||||||
kernel.psicomp.GPU_direct = True
|
|
||||||
|
|
||||||
super(BayesianGPLVMMiniBatch,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
|
super(BayesianGPLVMMiniBatch,self).__init__(X, Y, Z, kernel, likelihood=likelihood,
|
||||||
name=name, inference_method=inference_method,
|
name=name, inference_method=inference_method,
|
||||||
normalizer=normalizer,
|
normalizer=normalizer,
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from bayesian_gplvm import BayesianGPLVM
|
from .bayesian_gplvm import BayesianGPLVM
|
||||||
from ..core.parameterization.variational import NormalPosterior, NormalPrior
|
from ..core.parameterization.variational import NormalPosterior, NormalPrior
|
||||||
|
|
||||||
class DPBayesianGPLVM(BayesianGPLVM):
|
class DPBayesianGPLVM(BayesianGPLVM):
|
||||||
|
|
@ -15,5 +15,5 @@ class DPBayesianGPLVM(BayesianGPLVM):
|
||||||
name='bayesian gplvm', mpi_comm=None, normalizer=None,
|
name='bayesian gplvm', mpi_comm=None, normalizer=None,
|
||||||
missing_data=False, stochastic=False, batchsize=1):
|
missing_data=False, stochastic=False, batchsize=1):
|
||||||
super(DPBayesianGPLVM,self).__init__(Y=Y, input_dim=input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, mpi_comm=mpi_comm, normalizer=normalizer, missing_data=missing_data, stochastic=stochastic, batchsize=batchsize, name='dp bayesian gplvm')
|
super(DPBayesianGPLVM,self).__init__(Y=Y, input_dim=input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, mpi_comm=mpi_comm, normalizer=normalizer, missing_data=missing_data, stochastic=stochastic, batchsize=batchsize, name='dp bayesian gplvm')
|
||||||
self.X.mean.set_prior(X_prior)
|
self.X.mean.set_prior(X_prior)
|
||||||
self.link_parameter(X_prior)
|
self.link_parameter(X_prior)
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ class GPRegression(GP):
|
||||||
:param Y: observed values
|
:param Y: observed values
|
||||||
:param kernel: a GPy kernel, defaults to rbf
|
:param kernel: a GPy kernel, defaults to rbf
|
||||||
:param Norm normalizer: [False]
|
:param Norm normalizer: [False]
|
||||||
|
:param noise_var: the noise variance for Gaussian likelhood, defaults to 1.
|
||||||
|
|
||||||
Normalize Y with the norm given.
|
Normalize Y with the norm given.
|
||||||
If normalizer is False, no normalization will be done
|
If normalizer is False, no normalization will be done
|
||||||
|
|
@ -25,12 +26,12 @@ class GPRegression(GP):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None):
|
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1.):
|
||||||
|
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.RBF(X.shape[1])
|
kernel = kern.RBF(X.shape[1])
|
||||||
|
|
||||||
likelihood = likelihoods.Gaussian()
|
likelihood = likelihoods.Gaussian(variance=noise_var)
|
||||||
|
|
||||||
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer)
|
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ log_2_pi = np.log(2*np.pi)
|
||||||
|
|
||||||
class GPVariationalGaussianApproximation(Model):
|
class GPVariationalGaussianApproximation(Model):
|
||||||
"""
|
"""
|
||||||
The Variational Gaussian Approximation revisited implementation for regression
|
The Variational Gaussian Approximation revisited
|
||||||
|
|
||||||
@article{Opper:2009,
|
@article{Opper:2009,
|
||||||
title = {The Variational Gaussian Approximation Revisited},
|
title = {The Variational Gaussian Approximation Revisited},
|
||||||
|
|
@ -25,44 +25,27 @@ class GPVariationalGaussianApproximation(Model):
|
||||||
pages = {786--792},
|
pages = {786--792},
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
def __init__(self, X, Y, kernel=None):
|
def __init__(self, X, Y, kernel, likelihood,Y_metadata=None):
|
||||||
Model.__init__(self,'Variational GP classification')
|
Model.__init__(self,'Variational GP classification')
|
||||||
# accept the construction arguments
|
# accept the construction arguments
|
||||||
self.X = ObsAr(X)
|
self.X = ObsAr(X)
|
||||||
if kernel is None:
|
self.Y = Y
|
||||||
kernel = kern.RBF(X.shape[1]) + kern.White(X.shape[1], 0.01)
|
|
||||||
self.kern = kernel
|
|
||||||
self.link_parameter(self.kern)
|
|
||||||
self.num_data, self.input_dim = self.X.shape
|
self.num_data, self.input_dim = self.X.shape
|
||||||
|
self.Y_metadata = Y_metadata
|
||||||
|
|
||||||
self.alpha = Param('alpha', np.zeros(self.num_data))
|
self.kern = kernel
|
||||||
|
self.likelihood = likelihood
|
||||||
|
self.link_parameter(self.kern)
|
||||||
|
self.link_parameter(self.likelihood)
|
||||||
|
|
||||||
|
self.alpha = Param('alpha', np.zeros((self.num_data,1))) # only one latent fn for now.
|
||||||
self.beta = Param('beta', np.ones(self.num_data))
|
self.beta = Param('beta', np.ones(self.num_data))
|
||||||
self.link_parameter(self.alpha)
|
self.link_parameter(self.alpha)
|
||||||
self.link_parameter(self.beta)
|
self.link_parameter(self.beta)
|
||||||
|
|
||||||
self.gh_x, self.gh_w = np.polynomial.hermite.hermgauss(20)
|
|
||||||
self.Ysign = np.where(Y==1, 1, -1).flatten()
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
"""
|
|
||||||
Marginal log likelihood evaluation
|
|
||||||
"""
|
|
||||||
return self._log_lik
|
return self._log_lik
|
||||||
|
|
||||||
def likelihood_quadrature(self, m, v):
|
|
||||||
"""
|
|
||||||
Perform Gauss-Hermite quadrature over the log of the likelihood, with a fixed weight
|
|
||||||
"""
|
|
||||||
# assume probit for now.
|
|
||||||
X = self.gh_x[None, :]*np.sqrt(2.*v[:, None]) + (m*self.Ysign)[:, None]
|
|
||||||
p = stats.norm.cdf(X)
|
|
||||||
N = stats.norm.pdf(X)
|
|
||||||
F = np.log(p).dot(self.gh_w)
|
|
||||||
NoverP = N/p
|
|
||||||
dF_dm = (NoverP*self.Ysign[:,None]).dot(self.gh_w)
|
|
||||||
dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(self.gh_w)
|
|
||||||
return F, dF_dm, dF_dv
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
K = self.kern.K(self.X)
|
K = self.kern.K(self.X)
|
||||||
m = K.dot(self.alpha)
|
m = K.dot(self.alpha)
|
||||||
|
|
@ -71,13 +54,14 @@ class GPVariationalGaussianApproximation(Model):
|
||||||
A = np.eye(self.num_data) + BKB
|
A = np.eye(self.num_data) + BKB
|
||||||
Ai, LA, _, Alogdet = pdinv(A)
|
Ai, LA, _, Alogdet = pdinv(A)
|
||||||
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
|
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
|
||||||
var = np.diag(Sigma)
|
var = np.diag(Sigma).reshape(-1,1)
|
||||||
|
|
||||||
F, dF_dm, dF_dv = self.likelihood_quadrature(m, var)
|
F, dF_dm, dF_dv, dF_dthetaL = self.likelihood.variational_expectations(self.Y, m, var, Y_metadata=self.Y_metadata)
|
||||||
|
self.likelihood.gradient = dF_dthetaL.sum(1).sum(1)
|
||||||
dF_da = np.dot(K, dF_dm)
|
dF_da = np.dot(K, dF_dm)
|
||||||
SigmaB = Sigma*self.beta
|
SigmaB = Sigma*self.beta
|
||||||
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv)).dot(SigmaB))*2
|
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv.flatten())).dot(SigmaB))*2
|
||||||
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + m.dot(self.alpha))
|
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + np.sum(m*self.alpha))
|
||||||
dKL_da = m
|
dKL_da = m
|
||||||
A_A2 = Ai - Ai.dot(Ai)
|
A_A2 = Ai - Ai.dot(Ai)
|
||||||
dKL_db = np.diag(np.dot(KB.T, A_A2))
|
dKL_db = np.diag(np.dot(KB.T, A_A2))
|
||||||
|
|
@ -86,12 +70,12 @@ class GPVariationalGaussianApproximation(Model):
|
||||||
self.beta.gradient = dF_db - dKL_db
|
self.beta.gradient = dF_db - dKL_db
|
||||||
|
|
||||||
# K-gradients
|
# K-gradients
|
||||||
dKL_dK = 0.5*(self.alpha[None, :]*self.alpha[:, None] + self.beta[:, None]*self.beta[None, :]*A_A2)
|
dKL_dK = 0.5*(self.alpha*self.alpha.T + self.beta[:, None]*self.beta[None, :]*A_A2)
|
||||||
tmp = Ai*self.beta[:, None]/self.beta[None, :]
|
tmp = Ai*self.beta[:, None]/self.beta[None, :]
|
||||||
dF_dK = self.alpha[:, None]*dF_dm[None, :] + np.dot(tmp*dF_dv, tmp.T)
|
dF_dK = self.alpha*dF_dm.T + np.dot(tmp*dF_dv, tmp.T)
|
||||||
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
|
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
|
||||||
|
|
||||||
def predict(self, Xnew):
|
def _raw_predict(self, Xnew):
|
||||||
"""
|
"""
|
||||||
Predict the function(s) at the new point(s) Xnew.
|
Predict the function(s) at the new point(s) Xnew.
|
||||||
|
|
||||||
|
|
@ -105,4 +89,4 @@ class GPVariationalGaussianApproximation(Model):
|
||||||
Kxx = self.kern.Kdiag(Xnew)
|
Kxx = self.kern.Kdiag(Xnew)
|
||||||
var = Kxx - np.sum(WiKux*Kux, 0)
|
var = Kxx - np.sum(WiKux*Kux, 0)
|
||||||
|
|
||||||
return 0.5*(1+erf(mu/np.sqrt(2.*(var+1))))
|
return mu, var.reshape(-1,1)
|
||||||
|
|
|
||||||
|
|
@ -228,14 +228,14 @@ class HessianChecker(GradientChecker):
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
if block_indices:
|
if block_indices:
|
||||||
print "\nBlock {}".format(block_indices)
|
print("\nBlock {}".format(block_indices))
|
||||||
else:
|
else:
|
||||||
print "\nAll blocks"
|
print("\nAll blocks")
|
||||||
|
|
||||||
header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference']
|
header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference']
|
||||||
header_string = map(lambda x: ' | '.join(header), [header])
|
header_string = map(lambda x: ' | '.join(header), [header])
|
||||||
separator = '-' * len(header_string[0])
|
separator = '-' * len(header_string[0])
|
||||||
print '\n'.join([header_string[0], separator])
|
print('\n'.join([header_string[0], separator]))
|
||||||
min_r = '%.6f' % float(numpy.min(ratio))
|
min_r = '%.6f' % float(numpy.min(ratio))
|
||||||
max_r = '%.6f' % float(numpy.max(ratio))
|
max_r = '%.6f' % float(numpy.max(ratio))
|
||||||
max_d = '%.6f' % float(numpy.max(difference))
|
max_d = '%.6f' % float(numpy.max(difference))
|
||||||
|
|
@ -248,7 +248,7 @@ class HessianChecker(GradientChecker):
|
||||||
checked = "\033[91m False \033[0m"
|
checked = "\033[91m False \033[0m"
|
||||||
|
|
||||||
grad_string = "{} | {} | {} | {} | {} ".format(checked, cols[0], cols[1], cols[2], cols[3])
|
grad_string = "{} | {} | {} | {} | {} ".format(checked, cols[0], cols[1], cols[2], cols[3])
|
||||||
print grad_string
|
print(grad_string)
|
||||||
|
|
||||||
if plot:
|
if plot:
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
|
|
@ -348,7 +348,7 @@ class SkewChecker(HessianChecker):
|
||||||
numeric_hess_partial = nd.Jacobian(self._df, vectorized=True)
|
numeric_hess_partial = nd.Jacobian(self._df, vectorized=True)
|
||||||
numeric_hess = numeric_hess_partial(x)
|
numeric_hess = numeric_hess_partial(x)
|
||||||
|
|
||||||
print "Done making numerical hessian"
|
print("Done making numerical hessian")
|
||||||
if analytic_hess.dtype is np.dtype('object'):
|
if analytic_hess.dtype is np.dtype('object'):
|
||||||
#Blockify numeric_hess aswell
|
#Blockify numeric_hess aswell
|
||||||
blocksizes, pagesizes = get_block_shapes_3d(analytic_hess)
|
blocksizes, pagesizes = get_block_shapes_3d(analytic_hess)
|
||||||
|
|
@ -365,7 +365,7 @@ class SkewChecker(HessianChecker):
|
||||||
#Unless super_plot is set, just plot the first one
|
#Unless super_plot is set, just plot the first one
|
||||||
p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False
|
p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False
|
||||||
if verbose:
|
if verbose:
|
||||||
print "Checking derivative of hessian wrt parameter number {}".format(block_ind)
|
print("Checking derivative of hessian wrt parameter number {}".format(block_ind))
|
||||||
check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p)
|
check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p)
|
||||||
|
|
||||||
current_index += current_size
|
current_index += current_size
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,95 @@ import numpy as np
|
||||||
|
|
||||||
from ..core.sparse_gp_mpi import SparseGP_MPI
|
from ..core.sparse_gp_mpi import SparseGP_MPI
|
||||||
from .. import kern
|
from .. import kern
|
||||||
|
from ..core.parameterization import Param
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior
|
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior,VariationalPrior
|
||||||
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
|
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
|
||||||
from ..kern._src.psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF_GPU
|
from ..kern._src.psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF_GPU
|
||||||
|
|
||||||
|
class IBPPosterior(SpikeAndSlabPosterior):
|
||||||
|
'''
|
||||||
|
The SpikeAndSlab distribution for variational approximations.
|
||||||
|
'''
|
||||||
|
def __init__(self, means, variances, binary_prob, tau=None, sharedX=False, name='latent space'):
|
||||||
|
"""
|
||||||
|
binary_prob : the probability of the distribution on the slab part.
|
||||||
|
"""
|
||||||
|
from ..core.parameterization.transformations import Logexp
|
||||||
|
super(IBPPosterior, self).__init__(means, variances, binary_prob, group_spike=True, name=name)
|
||||||
|
self.sharedX = sharedX
|
||||||
|
if sharedX:
|
||||||
|
self.mean.fix(warning=False)
|
||||||
|
self.variance.fix(warning=False)
|
||||||
|
self.tau = Param("tau_", np.ones((self.gamma_group.shape[0],2)), Logexp())
|
||||||
|
self.link_parameter(self.tau)
|
||||||
|
|
||||||
|
def set_gradients(self, grad):
|
||||||
|
self.mean.gradient, self.variance.gradient, self.gamma.gradient, self.tau.gradient = grad
|
||||||
|
|
||||||
|
def __getitem__(self, s):
|
||||||
|
if isinstance(s, (int, slice, tuple, list, np.ndarray)):
|
||||||
|
import copy
|
||||||
|
n = self.__new__(self.__class__, self.name)
|
||||||
|
dc = self.__dict__.copy()
|
||||||
|
dc['mean'] = self.mean[s]
|
||||||
|
dc['variance'] = self.variance[s]
|
||||||
|
dc['binary_prob'] = self.binary_prob[s]
|
||||||
|
dc['tau'] = self.tau
|
||||||
|
dc['parameters'] = copy.copy(self.parameters)
|
||||||
|
n.__dict__.update(dc)
|
||||||
|
n.parameters[dc['mean']._parent_index_] = dc['mean']
|
||||||
|
n.parameters[dc['variance']._parent_index_] = dc['variance']
|
||||||
|
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
|
||||||
|
n.parameters[dc['tau']._parent_index_] = dc['tau']
|
||||||
|
n._gradient_array_ = None
|
||||||
|
oversize = self.size - self.mean.size - self.variance.size - self.gamma.size - self.tau.size
|
||||||
|
n.size = n.mean.size + n.variance.size + n.gamma.size+ n.tau.size + oversize
|
||||||
|
n.ndim = n.mean.ndim
|
||||||
|
n.shape = n.mean.shape
|
||||||
|
n.num_data = n.mean.shape[0]
|
||||||
|
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
|
||||||
|
return n
|
||||||
|
else:
|
||||||
|
return super(IBPPosterior, self).__getitem__(s)
|
||||||
|
|
||||||
|
class IBPPrior(VariationalPrior):
|
||||||
|
def __init__(self, input_dim, alpha =2., name='IBPPrior', **kw):
|
||||||
|
super(IBPPrior, self).__init__(name=name, **kw)
|
||||||
|
from ..core.parameterization.transformations import Logexp, __fixed__
|
||||||
|
self.input_dim = input_dim
|
||||||
|
self.variance = 1.
|
||||||
|
self.alpha = Param('alpha', alpha, __fixed__)
|
||||||
|
self.link_parameter(self.alpha)
|
||||||
|
|
||||||
|
def KL_divergence(self, variational_posterior):
|
||||||
|
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
|
||||||
|
|
||||||
|
var_mean = np.square(mu)/self.variance
|
||||||
|
var_S = (S/self.variance - np.log(S))
|
||||||
|
part1 = (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
|
||||||
|
|
||||||
|
ad = self.alpha/self.input_dim
|
||||||
|
from scipy.special import betaln,digamma
|
||||||
|
part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + betaln(ad,1.)*self.input_dim \
|
||||||
|
-betaln(tau[:,0], tau[:,1]).sum() + ((tau[:,0]-gamma-ad)*digamma(tau[:,0])).sum() + \
|
||||||
|
((tau[:,1]+gamma-2.)*digamma(tau[:,1])).sum() + ((2.+ad-tau[:,0]-tau[:,1])*digamma(tau.sum(axis=1))).sum()
|
||||||
|
|
||||||
|
return part1+part2
|
||||||
|
|
||||||
|
def update_gradients_KL(self, variational_posterior):
|
||||||
|
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
|
||||||
|
|
||||||
|
variational_posterior.mean.gradient -= gamma*mu/self.variance
|
||||||
|
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * gamma /2.
|
||||||
|
from scipy.special import digamma,polygamma
|
||||||
|
dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data
|
||||||
|
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||||
|
ad = self.alpha/self.input_dim
|
||||||
|
common = (ad+2-tau[:,0]-tau[:,1])*polygamma(1,tau.sum(axis=1))
|
||||||
|
variational_posterior.tau.gradient[:,0] = -((tau[:,0]-gamma-ad)*polygamma(1,tau[:,0])+common)
|
||||||
|
variational_posterior.tau.gradient[:,1] = -((tau[:,1]+gamma-2)*polygamma(1,tau[:,1])+common)
|
||||||
|
|
||||||
class SSGPLVM(SparseGP_MPI):
|
class SSGPLVM(SparseGP_MPI):
|
||||||
"""
|
"""
|
||||||
Spike-and-Slab Gaussian Process Latent Variable Model
|
Spike-and-Slab Gaussian Process Latent Variable Model
|
||||||
|
|
@ -23,9 +107,11 @@ class SSGPLVM(SparseGP_MPI):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
|
def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
|
||||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, mpi_comm=None, pi=None, learnPi=True,normalizer=False, **kwargs):
|
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, IBP=False, alpha=2., tau=None, mpi_comm=None, pi=None, learnPi=False,normalizer=False, sharedX=False, variational_prior=None,**kwargs):
|
||||||
|
|
||||||
self.group_spike = group_spike
|
self.group_spike = group_spike
|
||||||
|
self.init = init
|
||||||
|
self.sharedX = sharedX
|
||||||
|
|
||||||
if X == None:
|
if X == None:
|
||||||
from ..util.initialization import initialize_latent
|
from ..util.initialization import initialize_latent
|
||||||
|
|
@ -33,8 +119,6 @@ class SSGPLVM(SparseGP_MPI):
|
||||||
else:
|
else:
|
||||||
fracs = np.ones(input_dim)
|
fracs = np.ones(input_dim)
|
||||||
|
|
||||||
self.init = init
|
|
||||||
|
|
||||||
if X_variance is None: # The variance of the variational approximation (S)
|
if X_variance is None: # The variance of the variational approximation (S)
|
||||||
X_variance = np.random.uniform(0,.1,X.shape)
|
X_variance = np.random.uniform(0,.1,X.shape)
|
||||||
|
|
||||||
|
|
@ -64,17 +148,16 @@ class SSGPLVM(SparseGP_MPI):
|
||||||
if pi is None:
|
if pi is None:
|
||||||
pi = np.empty((input_dim))
|
pi = np.empty((input_dim))
|
||||||
pi[:] = 0.5
|
pi[:] = 0.5
|
||||||
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi) # the prior probability of the latent binary variable b
|
|
||||||
|
if IBP:
|
||||||
X = SpikeAndSlabPosterior(X, X_variance, gamma)
|
self.variational_prior = IBPPrior(input_dim=input_dim, alpha=alpha) if variational_prior is None else variational_prior
|
||||||
|
X = IBPPosterior(X, X_variance, gamma, tau=tau,sharedX=sharedX)
|
||||||
|
else:
|
||||||
|
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi, group_spike=group_spike) if variational_prior is None else variational_prior
|
||||||
|
X = SpikeAndSlabPosterior(X, X_variance, gamma, group_spike=group_spike,sharedX=sharedX)
|
||||||
|
|
||||||
super(SSGPLVM,self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
|
super(SSGPLVM,self).__init__(X, Y, Z, kernel, likelihood, variational_prior=self.variational_prior, inference_method=inference_method, name=name, mpi_comm=mpi_comm, normalizer=normalizer, **kwargs)
|
||||||
# self.X.unfix()
|
|
||||||
# self.X.variance.constrain_positive()
|
|
||||||
self.link_parameter(self.X, index=0)
|
self.link_parameter(self.X, index=0)
|
||||||
|
|
||||||
if self.group_spike:
|
|
||||||
[self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in range(self.X.gamma.shape[1])] # Tie columns together
|
|
||||||
|
|
||||||
def set_X_gradients(self, X, X_grad):
|
def set_X_gradients(self, X, X_grad):
|
||||||
"""Set the gradients of the posterior distribution of X in its specific form."""
|
"""Set the gradients of the posterior distribution of X in its specific form."""
|
||||||
|
|
@ -84,9 +167,15 @@ class SSGPLVM(SparseGP_MPI):
|
||||||
"""Get the gradients of the posterior distribution of X in its specific form."""
|
"""Get the gradients of the posterior distribution of X in its specific form."""
|
||||||
return X.mean.gradient, X.variance.gradient, X.binary_prob.gradient
|
return X.mean.gradient, X.variance.gradient, X.binary_prob.gradient
|
||||||
|
|
||||||
|
def _propogate_X_val(self):
|
||||||
|
pass
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
|
self.X.propogate_val()
|
||||||
|
if self.sharedX: self._highest_parent_._propogate_X_val()
|
||||||
super(SSGPLVM,self).parameters_changed()
|
super(SSGPLVM,self).parameters_changed()
|
||||||
if isinstance(self.inference_method, VarDTC_minibatch):
|
if isinstance(self.inference_method, VarDTC_minibatch):
|
||||||
|
self.X.collate_gradient()
|
||||||
return
|
return
|
||||||
|
|
||||||
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||||
|
|
@ -95,6 +184,7 @@ class SSGPLVM(SparseGP_MPI):
|
||||||
|
|
||||||
# update for the KL divergence
|
# update for the KL divergence
|
||||||
self.variational_prior.update_gradients_KL(self.X)
|
self.variational_prior.update_gradients_KL(self.X)
|
||||||
|
self.X.collate_gradient()
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self):
|
||||||
if self.kern.ARD:
|
if self.kern.ARD:
|
||||||
|
|
|
||||||
|
|
@ -2,33 +2,256 @@
|
||||||
The Maniforld Relevance Determination model with the spike-and-slab prior
|
The Maniforld Relevance Determination model with the spike-and-slab prior
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
from ..core import Model
|
from ..core import Model
|
||||||
from .ss_gplvm import SSGPLVM
|
from .ss_gplvm import SSGPLVM
|
||||||
|
from ..core.parameterization.variational import SpikeAndSlabPrior,NormalPosterior,VariationalPrior
|
||||||
|
from ..util.misc import param_to_array
|
||||||
|
from ..kern import RBF
|
||||||
|
from ..core import Param
|
||||||
|
from numpy.linalg.linalg import LinAlgError
|
||||||
|
|
||||||
class SSMRD(Model):
|
class SSMRD(Model):
|
||||||
|
|
||||||
def __init__(self, Ylist, input_dim, X=None, X_variance=None,
|
def __init__(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx = 'PCA_concat', initz = 'permute',
|
||||||
initx = 'PCA', initz = 'permute',
|
num_inducing=10, Zs=None, kernels=None, inference_methods=None, likelihoods=None, group_spike=True,
|
||||||
num_inducing=10, Z=None, kernel=None,
|
pi=0.5, name='ss_mrd', Ynames=None, mpi_comm=None, IBP=False, alpha=2., taus=None, ):
|
||||||
inference_method=None, likelihoods=None, name='ss_mrd', Ynames=None):
|
|
||||||
super(SSMRD, self).__init__(name)
|
super(SSMRD, self).__init__(name)
|
||||||
|
self.mpi_comm = mpi_comm
|
||||||
|
self._PROPAGATE_ = False
|
||||||
|
|
||||||
self.updates = False
|
# initialize X for individual models
|
||||||
self.models = [SSGPLVM(y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing,Z=Z,init=initx,
|
X, X_variance, Gammas, fracs = self._init_X(Ylist, input_dim, X, X_variance, Gammas, initx)
|
||||||
kernel=kernel.copy() if kernel else None,inference_method=inference_method,likelihood=likelihoods,
|
self.X = NormalPosterior(means=X, variances=X_variance)
|
||||||
name='model_'+str(i)) for i,y in enumerate(Ylist)]
|
|
||||||
self.add_parameters(*(self.models))
|
|
||||||
|
|
||||||
[[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.mean.shape[1])]
|
if kernels is None:
|
||||||
for i in range(self.models[0].X.mean.shape[0])]
|
kernels = [RBF(input_dim, lengthscale=1./fracs, ARD=True) for i in xrange(len(Ylist))]
|
||||||
[[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in range(len(self.models))] for j in range(self.models[0].X.variance.shape[1])]
|
if Zs is None:
|
||||||
for i in range(self.models[0].X.variance.shape[0])]
|
Zs = [None]* len(Ylist)
|
||||||
|
if likelihoods is None:
|
||||||
|
likelihoods = [None]* len(Ylist)
|
||||||
|
if inference_methods is None:
|
||||||
|
inference_methods = [None]* len(Ylist)
|
||||||
|
|
||||||
self.updates = True
|
if IBP:
|
||||||
|
self.var_priors = [IBPPrior_SSMRD(len(Ylist),input_dim,alpha=alpha) for i in xrange(len(Ylist))]
|
||||||
|
else:
|
||||||
|
self.var_priors = [SpikeAndSlabPrior_SSMRD(nModels=len(Ylist),pi=pi,learnPi=False, group_spike=group_spike) for i in xrange(len(Ylist))]
|
||||||
|
self.models = [SSGPLVM(y, input_dim, X=X.copy(), X_variance=X_variance.copy(), Gamma=Gammas[i], num_inducing=num_inducing,Z=Zs[i], learnPi=False, group_spike=group_spike,
|
||||||
|
kernel=kernels[i],inference_method=inference_methods[i],likelihood=likelihoods[i], variational_prior=self.var_priors[i], IBP=IBP, tau=None if taus is None else taus[i],
|
||||||
|
name='model_'+str(i), mpi_comm=mpi_comm, sharedX=True) for i,y in enumerate(Ylist)]
|
||||||
|
self.link_parameters(*(self.models+[self.X]))
|
||||||
|
|
||||||
|
def _propogate_X_val(self):
|
||||||
|
if self._PROPAGATE_: return
|
||||||
|
for m in self.models:
|
||||||
|
m.X.mean.values[:] = self.X.mean.values
|
||||||
|
m.X.variance.values[:] = self.X.variance.values
|
||||||
|
varp_list = [m.X for m in self.models]
|
||||||
|
[vp._update_inernal(varp_list) for vp in self.var_priors]
|
||||||
|
self._PROPAGATE_=True
|
||||||
|
|
||||||
|
def _collate_X_gradient(self):
|
||||||
|
self._PROPAGATE_ = False
|
||||||
|
self.X.mean.gradient[:] = 0
|
||||||
|
self.X.variance.gradient[:] = 0
|
||||||
|
for m in self.models:
|
||||||
|
self.X.mean.gradient += m.X.mean.gradient
|
||||||
|
self.X.variance.gradient += m.X.variance.gradient
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
super(SSMRD, self).parameters_changed()
|
super(SSMRD, self).parameters_changed()
|
||||||
|
[m.parameters_changed() for m in self.models]
|
||||||
self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
|
self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
|
||||||
|
self._collate_X_gradient()
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
return self._log_marginal_likelihood
|
return self._log_marginal_likelihood
|
||||||
|
|
||||||
|
def _init_X(self, Ylist, input_dim, X=None, X_variance=None, Gammas=None, initx='PCA_concat'):
|
||||||
|
|
||||||
|
# Divide latent dimensions
|
||||||
|
idx = np.empty((input_dim,),dtype=np.int)
|
||||||
|
residue = (input_dim)%(len(Ylist))
|
||||||
|
for i in xrange(len(Ylist)):
|
||||||
|
if i < residue:
|
||||||
|
size = input_dim/len(Ylist)+1
|
||||||
|
idx[i*size:(i+1)*size] = i
|
||||||
|
else:
|
||||||
|
size = input_dim/len(Ylist)
|
||||||
|
idx[i*size+residue:(i+1)*size+residue] = i
|
||||||
|
|
||||||
|
if X is None:
|
||||||
|
if initx == 'PCA_concat':
|
||||||
|
X = np.empty((Ylist[0].shape[0],input_dim))
|
||||||
|
fracs = np.empty((input_dim,))
|
||||||
|
from ..util.initialization import initialize_latent
|
||||||
|
for i in xrange(len(Ylist)):
|
||||||
|
Y = Ylist[i]
|
||||||
|
dim = (idx==i).sum()
|
||||||
|
if dim>0:
|
||||||
|
x, fr = initialize_latent('PCA', dim, Y)
|
||||||
|
X[:,idx==i] = x
|
||||||
|
fracs[idx==i] = fr
|
||||||
|
elif initx=='PCA_joint':
|
||||||
|
y = np.hstack(Ylist)
|
||||||
|
from ..util.initialization import initialize_latent
|
||||||
|
X, fracs = initialize_latent('PCA', input_dim, y)
|
||||||
|
else:
|
||||||
|
X = np.random.randn(Ylist[0].shape[0], input_dim)
|
||||||
|
fracs = np.ones(input_dim)
|
||||||
|
else:
|
||||||
|
fracs = np.ones(input_dim)
|
||||||
|
|
||||||
|
|
||||||
|
if X_variance is None: # The variance of the variational approximation (S)
|
||||||
|
X_variance = np.random.uniform(0,.1,X.shape)
|
||||||
|
|
||||||
|
if Gammas is None:
|
||||||
|
Gammas = []
|
||||||
|
for x in X:
|
||||||
|
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
|
||||||
|
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
|
||||||
|
gamma[gamma>1.-1e-9] = 1.-1e-9
|
||||||
|
gamma[gamma<1e-9] = 1e-9
|
||||||
|
Gammas.append(gamma)
|
||||||
|
return X, X_variance, Gammas, fracs
|
||||||
|
|
||||||
|
@Model.optimizer_array.setter
|
||||||
|
def optimizer_array(self, p):
|
||||||
|
if self.mpi_comm != None:
|
||||||
|
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
|
||||||
|
self.mpi_comm.Bcast(np.int32(1),root=0)
|
||||||
|
self.mpi_comm.Bcast(p, root=0)
|
||||||
|
Model.optimizer_array.fset(self,p)
|
||||||
|
|
||||||
|
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||||
|
self._IN_OPTIMIZATION_ = True
|
||||||
|
if self.mpi_comm==None:
|
||||||
|
super(SSMRD, self).optimize(optimizer,start,**kwargs)
|
||||||
|
elif self.mpi_comm.rank==0:
|
||||||
|
super(SSMRD, self).optimize(optimizer,start,**kwargs)
|
||||||
|
self.mpi_comm.Bcast(np.int32(-1),root=0)
|
||||||
|
elif self.mpi_comm.rank>0:
|
||||||
|
x = self.optimizer_array.copy()
|
||||||
|
flag = np.empty(1,dtype=np.int32)
|
||||||
|
while True:
|
||||||
|
self.mpi_comm.Bcast(flag,root=0)
|
||||||
|
if flag==1:
|
||||||
|
try:
|
||||||
|
self.optimizer_array = x
|
||||||
|
self._fail_count = 0
|
||||||
|
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||||
|
if self._fail_count >= self._allowed_failures:
|
||||||
|
raise
|
||||||
|
self._fail_count += 1
|
||||||
|
elif flag==-1:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self._IN_OPTIMIZATION_ = False
|
||||||
|
raise Exception("Unrecognizable flag for synchronization!")
|
||||||
|
self._IN_OPTIMIZATION_ = False
|
||||||
|
|
||||||
|
|
||||||
|
class SpikeAndSlabPrior_SSMRD(SpikeAndSlabPrior):
|
||||||
|
def __init__(self, nModels, pi=0.5, learnPi=False, group_spike=True, variance = 1.0, name='SSMRDPrior', **kw):
|
||||||
|
self.nModels = nModels
|
||||||
|
self._b_prob_all = 0.5
|
||||||
|
super(SpikeAndSlabPrior_SSMRD, self).__init__(pi=pi,learnPi=learnPi,group_spike=group_spike,variance=variance, name=name, **kw)
|
||||||
|
|
||||||
|
def _update_inernal(self, varp_list):
|
||||||
|
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
|
||||||
|
# The probability for the binary variable for the same latent dimension of any of the models is on.
|
||||||
|
if self.group_spike:
|
||||||
|
self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
|
||||||
|
[np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
|
||||||
|
else:
|
||||||
|
self._b_prob_all = 1.-param_to_array(varp_list[0].binary_prob)
|
||||||
|
[np.multiply(self._b_prob_all, 1.-vp.binary_prob, self._b_prob_all) for vp in varp_list[1:]]
|
||||||
|
|
||||||
|
def KL_divergence(self, variational_posterior):
|
||||||
|
mu = variational_posterior.mean
|
||||||
|
S = variational_posterior.variance
|
||||||
|
if self.group_spike:
|
||||||
|
gamma = variational_posterior.binary_prob[0]
|
||||||
|
else:
|
||||||
|
gamma = variational_posterior.binary_prob
|
||||||
|
if len(self.pi.shape)==2:
|
||||||
|
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
|
||||||
|
pi = self.pi[idx]
|
||||||
|
else:
|
||||||
|
pi = self.pi
|
||||||
|
|
||||||
|
var_mean = np.square(mu)/self.variance
|
||||||
|
var_S = (S/self.variance - np.log(S))
|
||||||
|
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
|
||||||
|
return var_gamma +((1.-self._b_prob_all)*(np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
|
||||||
|
|
||||||
|
def update_gradients_KL(self, variational_posterior):
|
||||||
|
mu = variational_posterior.mean
|
||||||
|
S = variational_posterior.variance
|
||||||
|
N = variational_posterior.num_data
|
||||||
|
if self.group_spike:
|
||||||
|
gamma = variational_posterior.binary_prob.values[0]
|
||||||
|
else:
|
||||||
|
gamma = variational_posterior.binary_prob.values
|
||||||
|
if len(self.pi.shape)==2:
|
||||||
|
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
|
||||||
|
pi = self.pi[idx]
|
||||||
|
else:
|
||||||
|
pi = self.pi
|
||||||
|
|
||||||
|
if self.group_spike:
|
||||||
|
tmp = self._b_prob_all/(1.-gamma)
|
||||||
|
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))/N +tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||||
|
else:
|
||||||
|
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||||
|
mu.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
|
||||||
|
S.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
|
||||||
|
if self.learnPi:
|
||||||
|
raise 'Not Supported!'
|
||||||
|
|
||||||
|
class IBPPrior_SSMRD(VariationalPrior):
|
||||||
|
def __init__(self, nModels, input_dim, alpha =2., tau=None, name='IBPPrior', **kw):
|
||||||
|
super(IBPPrior_SSMRD, self).__init__(name=name, **kw)
|
||||||
|
from ..core.parameterization.transformations import Logexp, __fixed__
|
||||||
|
self.nModels = nModels
|
||||||
|
self._b_prob_all = 0.5
|
||||||
|
self.input_dim = input_dim
|
||||||
|
self.variance = 1.
|
||||||
|
self.alpha = Param('alpha', alpha, __fixed__)
|
||||||
|
self.link_parameter(self.alpha)
|
||||||
|
|
||||||
|
def _update_inernal(self, varp_list):
|
||||||
|
"""Make an update of the internal status by gathering the variational posteriors for all the individual models."""
|
||||||
|
# The probability for the binary variable for the same latent dimension of any of the models is on.
|
||||||
|
self._b_prob_all = 1.-param_to_array(varp_list[0].gamma_group)
|
||||||
|
[np.multiply(self._b_prob_all, 1.-vp.gamma_group, self._b_prob_all) for vp in varp_list[1:]]
|
||||||
|
|
||||||
|
def KL_divergence(self, variational_posterior):
|
||||||
|
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
|
||||||
|
|
||||||
|
var_mean = np.square(mu)/self.variance
|
||||||
|
var_S = (S/self.variance - np.log(S))
|
||||||
|
part1 = ((1.-self._b_prob_all)* (np.log(self.variance)-1. +var_mean + var_S)).sum()/(2.*self.nModels)
|
||||||
|
|
||||||
|
ad = self.alpha/self.input_dim
|
||||||
|
from scipy.special import betaln,digamma
|
||||||
|
part2 = (gamma*np.log(gamma)).sum() + ((1.-gamma)*np.log(1.-gamma)).sum() + (betaln(ad,1.)*self.input_dim -betaln(tau[:,0], tau[:,1]).sum())/self.nModels \
|
||||||
|
+ (( (tau[:,0]-ad)/self.nModels -gamma)*digamma(tau[:,0])).sum() + \
|
||||||
|
(((tau[:,1]-1.)/self.nModels+gamma-1.)*digamma(tau[:,1])).sum() + (((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*digamma(tau.sum(axis=1))).sum()
|
||||||
|
return part1+part2
|
||||||
|
|
||||||
|
def update_gradients_KL(self, variational_posterior):
|
||||||
|
mu, S, gamma, tau = variational_posterior.mean.values, variational_posterior.variance.values, variational_posterior.gamma_group.values, variational_posterior.tau.values
|
||||||
|
|
||||||
|
variational_posterior.mean.gradient -= (1.-self._b_prob_all)*mu/(self.variance*self.nModels)
|
||||||
|
variational_posterior.variance.gradient -= (1./self.variance - 1./S) * (1.-self._b_prob_all) /(2.*self.nModels)
|
||||||
|
from scipy.special import digamma,polygamma
|
||||||
|
tmp = self._b_prob_all/(1.-gamma)
|
||||||
|
dgamma = (np.log(gamma/(1.-gamma))+ digamma(tau[:,1])-digamma(tau[:,0]))/variational_posterior.num_data
|
||||||
|
variational_posterior.binary_prob.gradient -= dgamma+tmp*((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||||
|
ad = self.alpha/self.input_dim
|
||||||
|
common = ((1.+ad-tau[:,0]-tau[:,1])/self.nModels+1.)*polygamma(1,tau.sum(axis=1))
|
||||||
|
variational_posterior.tau.gradient[:,0] = -(((tau[:,0]-ad)/self.nModels -gamma)*polygamma(1,tau[:,0])+common)
|
||||||
|
variational_posterior.tau.gradient[:,1] = -(((tau[:,1]-1.)/self.nModels+gamma-1.)*polygamma(1,tau[:,1])+common)
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from base_plots import gpplot, x_frame1D, x_frame2D
|
||||||
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
|
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
|
||||||
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
|
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
|
||||||
from scipy import sparse
|
from scipy import sparse
|
||||||
|
from ...core.parameterization.variational import VariationalPosterior
|
||||||
|
|
||||||
def plot_fit(model, plot_limits=None, which_data_rows='all',
|
def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
which_data_ycols='all', fixed_inputs=[],
|
which_data_ycols='all', fixed_inputs=[],
|
||||||
|
|
@ -78,7 +79,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||||
|
|
||||||
if predict_kw is None:
|
if predict_kw is None:
|
||||||
predict_kw = {}
|
predict_kw = {}
|
||||||
|
|
||||||
#work out what the inputs are for plotting (1D or 2D)
|
#work out what the inputs are for plotting (1D or 2D)
|
||||||
fixed_dims = np.array([i for i,v in fixed_inputs])
|
fixed_dims = np.array([i for i,v in fixed_inputs])
|
||||||
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
|
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
|
||||||
|
|
@ -219,7 +220,7 @@ def plot_fit_f(model, *args, **kwargs):
|
||||||
kwargs['plot_raw'] = True
|
kwargs['plot_raw'] = True
|
||||||
plot_fit(model,*args, **kwargs)
|
plot_fit(model,*args, **kwargs)
|
||||||
|
|
||||||
def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True):
|
def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True, X_all=False):
|
||||||
"""
|
"""
|
||||||
Convenience function for returning back fixed_inputs where the other inputs
|
Convenience function for returning back fixed_inputs where the other inputs
|
||||||
are fixed using fix_routine
|
are fixed using fix_routine
|
||||||
|
|
@ -235,8 +236,13 @@ def fixed_inputs(model, non_fixed_inputs, fix_routine='median', as_list=True):
|
||||||
f_inputs = []
|
f_inputs = []
|
||||||
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
|
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
|
||||||
X = model.X.mean.values.copy()
|
X = model.X.mean.values.copy()
|
||||||
else:
|
elif isinstance(model.X, VariationalPosterior):
|
||||||
X = model.X.values.copy()
|
X = model.X.values.copy()
|
||||||
|
else:
|
||||||
|
if X_all:
|
||||||
|
X = model.X_all.copy()
|
||||||
|
else:
|
||||||
|
X = model.X.copy()
|
||||||
for i in range(X.shape[1]):
|
for i in range(X.shape[1]):
|
||||||
if i not in non_fixed_inputs:
|
if i not in non_fixed_inputs:
|
||||||
if fix_routine == 'mean':
|
if fix_routine == 'mean':
|
||||||
|
|
|
||||||
|
|
@ -408,12 +408,13 @@ class mocap_data_show_vpython(vpython_show):
|
||||||
class mocap_data_show(matplotlib_show):
|
class mocap_data_show(matplotlib_show):
|
||||||
"""Base class for visualizing motion capture data."""
|
"""Base class for visualizing motion capture data."""
|
||||||
|
|
||||||
def __init__(self, vals, axes=None, connect=None):
|
def __init__(self, vals, axes=None, connect=None, color='b'):
|
||||||
if axes==None:
|
if axes==None:
|
||||||
fig = plt.figure()
|
fig = plt.figure()
|
||||||
axes = fig.add_subplot(111, projection='3d', aspect='equal')
|
axes = fig.add_subplot(111, projection='3d', aspect='equal')
|
||||||
matplotlib_show.__init__(self, vals, axes)
|
matplotlib_show.__init__(self, vals, axes)
|
||||||
|
|
||||||
|
self.color = color
|
||||||
self.connect = connect
|
self.connect = connect
|
||||||
self.process_values()
|
self.process_values()
|
||||||
self.initialize_axes()
|
self.initialize_axes()
|
||||||
|
|
@ -423,7 +424,7 @@ class mocap_data_show(matplotlib_show):
|
||||||
self.axes.figure.canvas.draw()
|
self.axes.figure.canvas.draw()
|
||||||
|
|
||||||
def draw_vertices(self):
|
def draw_vertices(self):
|
||||||
self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2])
|
self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2], color=self.color)
|
||||||
|
|
||||||
def draw_edges(self):
|
def draw_edges(self):
|
||||||
self.line_handle = []
|
self.line_handle = []
|
||||||
|
|
@ -442,7 +443,7 @@ class mocap_data_show(matplotlib_show):
|
||||||
z.append(self.vals[i, 2])
|
z.append(self.vals[i, 2])
|
||||||
z.append(self.vals[j, 2])
|
z.append(self.vals[j, 2])
|
||||||
z.append(np.NaN)
|
z.append(np.NaN)
|
||||||
self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), 'b-')
|
self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), '-', color=self.color)
|
||||||
|
|
||||||
def modify(self, vals):
|
def modify(self, vals):
|
||||||
self.vals = vals.copy()
|
self.vals = vals.copy()
|
||||||
|
|
@ -450,7 +451,7 @@ class mocap_data_show(matplotlib_show):
|
||||||
self.initialize_axes_modify()
|
self.initialize_axes_modify()
|
||||||
self.draw_vertices()
|
self.draw_vertices()
|
||||||
self.initialize_axes()
|
self.initialize_axes()
|
||||||
self.finalize_axes_modify()
|
#self.finalize_axes_modify()
|
||||||
self.draw_edges()
|
self.draw_edges()
|
||||||
self.axes.figure.canvas.draw()
|
self.axes.figure.canvas.draw()
|
||||||
|
|
||||||
|
|
@ -469,12 +470,20 @@ class mocap_data_show(matplotlib_show):
|
||||||
self.line_handle[0].remove()
|
self.line_handle[0].remove()
|
||||||
|
|
||||||
def finalize_axes(self):
|
def finalize_axes(self):
|
||||||
self.axes.set_xlim(self.x_lim)
|
# self.axes.set_xlim(self.x_lim)
|
||||||
self.axes.set_ylim(self.y_lim)
|
# self.axes.set_ylim(self.y_lim)
|
||||||
self.axes.set_zlim(self.z_lim)
|
# self.axes.set_zlim(self.z_lim)
|
||||||
self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.])
|
# self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.])
|
||||||
|
|
||||||
# self.axes.set_aspect('equal')
|
extents = np.array([getattr(self.axes, 'get_{}lim'.format(dim))() for dim in 'xyz'])
|
||||||
|
sz = extents[:,1] - extents[:,0]
|
||||||
|
centers = np.mean(extents, axis=1)
|
||||||
|
maxsize = max(abs(sz))
|
||||||
|
r = maxsize/2
|
||||||
|
for ctr, dim in zip(centers, 'xyz'):
|
||||||
|
getattr(self.axes, 'set_{}lim'.format(dim))(ctr - r, ctr + r)
|
||||||
|
|
||||||
|
# self.axes.set_aspect('equal')
|
||||||
# self.axes.autoscale(enable=False)
|
# self.axes.autoscale(enable=False)
|
||||||
|
|
||||||
def finalize_axes_modify(self):
|
def finalize_axes_modify(self):
|
||||||
|
|
@ -494,7 +503,7 @@ class stick_show(mocap_data_show):
|
||||||
|
|
||||||
class skeleton_show(mocap_data_show):
|
class skeleton_show(mocap_data_show):
|
||||||
"""data_show class for visualizing motion capture data encoded as a skeleton with angles."""
|
"""data_show class for visualizing motion capture data encoded as a skeleton with angles."""
|
||||||
def __init__(self, vals, skel, axes=None, padding=0):
|
def __init__(self, vals, skel, axes=None, padding=0, color='b'):
|
||||||
"""data_show class for visualizing motion capture data encoded as a skeleton with angles.
|
"""data_show class for visualizing motion capture data encoded as a skeleton with angles.
|
||||||
:param vals: set of modeled angles to use for printing in the axis when it's first created.
|
:param vals: set of modeled angles to use for printing in the axis when it's first created.
|
||||||
:type vals: np.array
|
:type vals: np.array
|
||||||
|
|
@ -506,7 +515,7 @@ class skeleton_show(mocap_data_show):
|
||||||
self.skel = skel
|
self.skel = skel
|
||||||
self.padding = padding
|
self.padding = padding
|
||||||
connect = skel.connection_matrix()
|
connect = skel.connection_matrix()
|
||||||
mocap_data_show.__init__(self, vals, axes=axes, connect=connect)
|
mocap_data_show.__init__(self, vals, axes=axes, connect=connect, color=color)
|
||||||
def process_values(self):
|
def process_values(self):
|
||||||
"""Takes a set of angles and converts them to the x,y,z coordinates in the internal prepresentation of the class, ready for plotting.
|
"""Takes a set of angles and converts them to the x,y,z coordinates in the internal prepresentation of the class, ready for plotting.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,8 +9,8 @@ These tests make sure that the opure python and cython codes work the same
|
||||||
|
|
||||||
class CythonTestChols(np.testing.TestCase):
|
class CythonTestChols(np.testing.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.flat = np.random.randn(45, 5)
|
self.flat = np.random.randn(45,5)
|
||||||
self.triang = np.dstack([np.eye(20)[:,:,None] for i in range(3)])
|
self.triang = np.array([np.eye(20) for i in range(3)])
|
||||||
def test_flat_to_triang(self):
|
def test_flat_to_triang(self):
|
||||||
L1 = choleskies._flat_to_triang_pure(self.flat)
|
L1 = choleskies._flat_to_triang_pure(self.flat)
|
||||||
L2 = choleskies._flat_to_triang_cython(self.flat)
|
L2 = choleskies._flat_to_triang_cython(self.flat)
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,7 @@ class LinkFunctionTests(np.testing.TestCase):
|
||||||
self.assertTrue(grad3.checkgrad(verbose=True))
|
self.assertTrue(grad3.checkgrad(verbose=True))
|
||||||
|
|
||||||
if test_lim:
|
if test_lim:
|
||||||
print "Testing limits"
|
print("Testing limits")
|
||||||
#Remove some otherwise we are too close to the limit for gradcheck to work effectively
|
#Remove some otherwise we are too close to the limit for gradcheck to work effectively
|
||||||
lim_of_inf = lim_of_inf - 1e-4
|
lim_of_inf = lim_of_inf - 1e-4
|
||||||
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf)
|
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf)
|
||||||
|
|
|
||||||
|
|
@ -100,10 +100,10 @@ def block_dot(A, B, diagonal=False):
|
||||||
Dshape = D.shape
|
Dshape = D.shape
|
||||||
if diagonal and (len(Cshape) == 1 or len(Dshape) == 1\
|
if diagonal and (len(Cshape) == 1 or len(Dshape) == 1\
|
||||||
or C.shape[0] != C.shape[1] or D.shape[0] != D.shape[1]):
|
or C.shape[0] != C.shape[1] or D.shape[0] != D.shape[1]):
|
||||||
print "Broadcasting, C: {} D:{}".format(C.shape, D.shape)
|
print("Broadcasting, C: {} D:{}".format(C.shape, D.shape))
|
||||||
return C*D
|
return C*D
|
||||||
else:
|
else:
|
||||||
print "Dotting, C: {} C:{}".format(C.shape, D.shape)
|
print("Dotting, C: {} C:{}".format(C.shape, D.shape))
|
||||||
return np.dot(C,D)
|
return np.dot(C,D)
|
||||||
dot = np.vectorize(f, otypes = [np.object])
|
dot = np.vectorize(f, otypes = [np.object])
|
||||||
return dot(A,B)
|
return dot(A,B)
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import numpy as np
|
||||||
from . import linalg
|
from . import linalg
|
||||||
from .config import config
|
from .config import config
|
||||||
|
|
||||||
import choleskies_cython
|
from . import choleskies_cython
|
||||||
|
|
||||||
def safe_root(N):
|
def safe_root(N):
|
||||||
i = np.sqrt(N)
|
i = np.sqrt(N)
|
||||||
|
|
@ -17,12 +17,12 @@ def safe_root(N):
|
||||||
def _flat_to_triang_pure(flat_mat):
|
def _flat_to_triang_pure(flat_mat):
|
||||||
N, D = flat_mat.shape
|
N, D = flat_mat.shape
|
||||||
M = (-1 + safe_root(8*N+1))//2
|
M = (-1 + safe_root(8*N+1))//2
|
||||||
ret = np.zeros((M, M, D))
|
ret = np.zeros((D, M, M))
|
||||||
count = 0
|
for d in range(D):
|
||||||
for m in range(M):
|
count = 0
|
||||||
for mm in range(m+1):
|
for m in range(M):
|
||||||
for d in range(D):
|
for mm in range(m+1):
|
||||||
ret.flat[d + m*D*M + mm*D] = flat_mat.flat[count];
|
ret[d,m, mm] = flat_mat[count, d];
|
||||||
count = count+1
|
count = count+1
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
@ -33,15 +33,15 @@ def _flat_to_triang_cython(flat_mat):
|
||||||
|
|
||||||
|
|
||||||
def _triang_to_flat_pure(L):
|
def _triang_to_flat_pure(L):
|
||||||
M, _, D = L.shape
|
D, _, M = L.shape
|
||||||
|
|
||||||
N = M*(M+1)//2
|
N = M*(M+1)//2
|
||||||
flat = np.empty((N, D))
|
flat = np.empty((N, D))
|
||||||
count = 0;
|
for d in range(D):
|
||||||
for m in range(M):
|
count = 0;
|
||||||
for mm in range(m+1):
|
for m in range(M):
|
||||||
for d in range(D):
|
for mm in range(m+1):
|
||||||
flat.flat[count] = L.flat[d + m*D*M + mm*D];
|
flat[count,d] = L[d, m, mm]
|
||||||
count = count +1
|
count = count +1
|
||||||
return flat
|
return flat
|
||||||
|
|
||||||
|
|
@ -59,12 +59,12 @@ def _backprop_gradient_pure(dL, L):
|
||||||
"""
|
"""
|
||||||
dL_dK = np.tril(dL).copy()
|
dL_dK = np.tril(dL).copy()
|
||||||
N = L.shape[0]
|
N = L.shape[0]
|
||||||
for k in xrange(N - 1, -1, -1):
|
for k in range(N - 1, -1, -1):
|
||||||
for j in xrange(k + 1, N):
|
for j in range(k + 1, N):
|
||||||
for i in xrange(j, N):
|
for i in range(j, N):
|
||||||
dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
|
dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
|
||||||
dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
|
dL_dK[j, k] -= dL_dK[i, j] * L[i, k]
|
||||||
for j in xrange(k + 1, N):
|
for j in range(k + 1, N):
|
||||||
dL_dK[j, k] /= L[k, k]
|
dL_dK[j, k] /= L[k, k]
|
||||||
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
|
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
|
||||||
dL_dK[k, k] /= (2 * L[k, k])
|
dL_dK[k, k] /= (2 * L[k, k])
|
||||||
|
|
@ -74,7 +74,7 @@ def triang_to_cov(L):
|
||||||
return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in range(L.shape[-1])])
|
return np.dstack([np.dot(L[:,:,i], L[:,:,i].T) for i in range(L.shape[-1])])
|
||||||
|
|
||||||
def multiple_dpotri(Ls):
|
def multiple_dpotri(Ls):
|
||||||
return np.dstack([linalg.dpotri(np.asfortranarray(Ls[:,:,i]), lower=1)[0] for i in range(Ls.shape[-1])])
|
return np.array([linalg.dpotri(np.asfortranarray(Ls[i]), lower=1)[0] for i in range(Ls.shape[0])])
|
||||||
|
|
||||||
def indexes_to_fix_for_low_rank(rank, size):
|
def indexes_to_fix_for_low_rank(rank, size):
|
||||||
"""
|
"""
|
||||||
|
|
@ -100,7 +100,7 @@ def indexes_to_fix_for_low_rank(rank, size):
|
||||||
if config.getboolean('cython', 'working'):
|
if config.getboolean('cython', 'working'):
|
||||||
triang_to_flat = _triang_to_flat_cython
|
triang_to_flat = _triang_to_flat_cython
|
||||||
flat_to_triang = _flat_to_triang_cython
|
flat_to_triang = _flat_to_triang_cython
|
||||||
backprop_gradient = choleskies_cython.backprop_gradient
|
backprop_gradient = choleskies_cython.backprop_gradient_par_c
|
||||||
else:
|
else:
|
||||||
backprop_gradient = _backprop_gradient_pure
|
backprop_gradient = _backprop_gradient_pure
|
||||||
triang_to_flat = _triang_to_flat_pure
|
triang_to_flat = _triang_to_flat_pure
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -5,31 +5,32 @@
|
||||||
# Copyright James Hensman and Alan Saul 2015
|
# Copyright James Hensman and Alan Saul 2015
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from cython.parallel import prange, parallel
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
|
|
||||||
def flat_to_triang(np.ndarray[double, ndim=2] flat, int M):
|
def flat_to_triang(np.ndarray[double, ndim=2] flat, int M):
|
||||||
"""take a matrix N x D and return a M X M x D array where
|
"""take a matrix N x D and return a D X M x M array where
|
||||||
|
|
||||||
N = M(M+1)/2
|
N = M(M+1)/2
|
||||||
|
|
||||||
the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
|
the lower triangluar portion of the d'th slice of the result is filled by the d'th column of flat.
|
||||||
"""
|
"""
|
||||||
cdef int N = flat.shape[0]
|
|
||||||
cdef int D = flat.shape[1]
|
cdef int D = flat.shape[1]
|
||||||
|
cdef int N = flat.shape[0]
|
||||||
cdef int count = 0
|
cdef int count = 0
|
||||||
cdef np.ndarray[double, ndim=3] ret = np.zeros((M, M, D))
|
cdef np.ndarray[double, ndim=3] ret = np.zeros((D, M, M))
|
||||||
cdef int d, m, mm
|
cdef int d, m, mm
|
||||||
for d in range(D):
|
for d in range(D):
|
||||||
count = 0
|
count = 0
|
||||||
for m in range(M):
|
for m in range(M):
|
||||||
for mm in range(m+1):
|
for mm in range(m+1):
|
||||||
ret[m, mm, d] = flat[count,d]
|
ret[d, m, mm] = flat[count,d]
|
||||||
count += 1
|
count += 1
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def triang_to_flat(np.ndarray[double, ndim=3] L):
|
def triang_to_flat(np.ndarray[double, ndim=3] L):
|
||||||
cdef int M = L.shape[0]
|
cdef int D = L.shape[0]
|
||||||
cdef int D = L.shape[2]
|
cdef int M = L.shape[1]
|
||||||
cdef int N = M*(M+1)/2
|
cdef int N = M*(M+1)/2
|
||||||
cdef int count = 0
|
cdef int count = 0
|
||||||
cdef np.ndarray[double, ndim=2] flat = np.empty((N, D))
|
cdef np.ndarray[double, ndim=2] flat = np.empty((N, D))
|
||||||
|
|
@ -38,7 +39,7 @@ def triang_to_flat(np.ndarray[double, ndim=3] L):
|
||||||
count = 0
|
count = 0
|
||||||
for m in range(M):
|
for m in range(M):
|
||||||
for mm in range(m+1):
|
for mm in range(m+1):
|
||||||
flat[count,d] = L[m, mm, d]
|
flat[count,d] = L[d, m, mm]
|
||||||
count += 1
|
count += 1
|
||||||
return flat
|
return flat
|
||||||
|
|
||||||
|
|
@ -57,3 +58,50 @@ def backprop_gradient(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2]
|
||||||
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
|
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
|
||||||
dL_dK[k, k] /= (2. * L[k, k])
|
dL_dK[k, k] /= (2. * L[k, k])
|
||||||
return dL_dK
|
return dL_dK
|
||||||
|
|
||||||
|
def backprop_gradient_par(double[:,:] dL, double[:,:] L):
|
||||||
|
cdef double[:,:] dL_dK = np.tril(dL).copy()
|
||||||
|
cdef int N = L.shape[0]
|
||||||
|
cdef int k, j, i
|
||||||
|
for k in range(N - 1, -1, -1):
|
||||||
|
with nogil, parallel():
|
||||||
|
for i in prange(k + 1, N):
|
||||||
|
for j in range(k+1, i+1):
|
||||||
|
dL_dK[i, k] -= dL_dK[i, j] * L[j, k]
|
||||||
|
for j in range(i, N):
|
||||||
|
dL_dK[i, k] -= dL_dK[j, i] * L[j, k]
|
||||||
|
for j in range(k + 1, N):
|
||||||
|
dL_dK[j, k] /= L[k, k]
|
||||||
|
dL_dK[k, k] -= L[j, k] * dL_dK[j, k]
|
||||||
|
dL_dK[k, k] /= (2. * L[k, k])
|
||||||
|
return dL_dK
|
||||||
|
|
||||||
|
#here's a pure C version...
|
||||||
|
cdef extern from "cholesky_backprop.h" nogil:
|
||||||
|
void chol_backprop(int N, double* dL, double* L)
|
||||||
|
|
||||||
|
def backprop_gradient_par_c(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
|
||||||
|
cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL) # makes a copy, c-contig
|
||||||
|
cdef int N = L.shape[0]
|
||||||
|
with nogil:
|
||||||
|
chol_backprop(N, <double*> dL_dK.data, <double*> L.data)
|
||||||
|
return dL_dK
|
||||||
|
|
||||||
|
cdef extern from "cholesky_backprop.h" nogil:
|
||||||
|
void old_chol_backprop(int N, double* dL, double* L)
|
||||||
|
|
||||||
|
def backprop_gradient_par_c_old(np.ndarray[double, ndim=2] dL, np.ndarray[double, ndim=2] L):
|
||||||
|
cdef np.ndarray[double, ndim=2] dL_dK = np.tril(dL) # makes a copy, c-contig
|
||||||
|
cdef int N = L.shape[0]
|
||||||
|
with nogil:
|
||||||
|
old_chol_backprop(N, <double*> dL_dK.data, <double*> L.data)
|
||||||
|
return dL_dK
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
51
GPy/util/cholesky_backprop.c
Normal file
51
GPy/util/cholesky_backprop.c
Normal file
|
|
@ -0,0 +1,51 @@
|
||||||
|
#include <cblas.h>
|
||||||
|
void chol_backprop(int N, double* dL, double* L){
|
||||||
|
//at the input to this fn, dL is df_dL. after this fn is complet, dL is df_dK
|
||||||
|
int i,k;
|
||||||
|
|
||||||
|
dL[N*N - 1] /= (2. * L[N*N - 1]);
|
||||||
|
for(k=N-2;k>(-1);k--){
|
||||||
|
cblas_dsymv(CblasRowMajor, CblasLower,
|
||||||
|
N-k-1, -1,
|
||||||
|
&dL[(N*(k+1) + k+1)],N,
|
||||||
|
&L[k*N+k+1],1,
|
||||||
|
1, &dL[N*(k+1)+k], N);
|
||||||
|
for(i=0;i<(N-k-1); i++){
|
||||||
|
dL[N*(k+1+i)+k] -= dL[N*(k+1)+k+i*(N+1)+1] * L[k*N+k+1+i];
|
||||||
|
}
|
||||||
|
|
||||||
|
cblas_dscal(N-k-1, 1.0/L[k*N+k], &dL[(k+1)*N+k], N);
|
||||||
|
dL[k*N + k] -= cblas_ddot(N-k-1, &dL[(k+1)*N+k], N, &L[k*N+k+1], 1);
|
||||||
|
dL[k*N + k] /= (2.0 * L[k*N + k]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double mydot(int n, double* a, int stride_a, double* b, int stride_b){
|
||||||
|
double ret = 0;
|
||||||
|
for(int i=0; i<n; i++){
|
||||||
|
ret += a[i*stride_a]*b[i*stride_b];
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
void old_chol_backprop(int N, double* dL, double* U){
|
||||||
|
//at the input to this fn, dL is df_dL. after this fn is complet, dL is df_dK
|
||||||
|
int iN, kN,i,j,k;
|
||||||
|
dL[N*N-1] /= (2. * U[N*N-1]);
|
||||||
|
for(k=N-2;k>(-1);k--){
|
||||||
|
kN = k*N;
|
||||||
|
#pragma omp parallel for private(i,iN)
|
||||||
|
for(i=k+1; i<N; i++){
|
||||||
|
iN = i*N;
|
||||||
|
dL[iN+k] -= mydot(i-k, &dL[iN+k+1], 1, &U[kN+k+1], 1);
|
||||||
|
dL[iN+k] -= mydot(N-i, &dL[iN+i], N, &U[kN+i], 1);
|
||||||
|
|
||||||
|
}
|
||||||
|
for(i=(k + 1); i<N; i++){
|
||||||
|
iN = i*N;
|
||||||
|
dL[iN + k] /= U[kN + k];
|
||||||
|
dL[kN + k] -= U[kN + i] * dL[iN + k];
|
||||||
|
}
|
||||||
|
dL[kN + k] /= (2. * U[kN + k]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
5
GPy/util/cholesky_backprop.h
Normal file
5
GPy/util/cholesky_backprop.h
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
#include <cblas.h>
|
||||||
|
|
||||||
|
void dsymv(int N, double*A, double*b, double*y);
|
||||||
|
double mydot(int n, double* a, int stride_a, double* b, int stride_b);
|
||||||
|
void chol_backprop(int N, double* dL, double* L);
|
||||||
|
|
@ -15,7 +15,7 @@ import warnings
|
||||||
import os
|
import os
|
||||||
from .config import config
|
from .config import config
|
||||||
import logging
|
import logging
|
||||||
import linalg_cython
|
from . import linalg_cython
|
||||||
|
|
||||||
|
|
||||||
_scipyversion = np.float64((scipy.__version__).split('.')[:2])
|
_scipyversion = np.float64((scipy.__version__).split('.')[:2])
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,6 @@ Continuous integration status: ,
|
extra_link_args = ['-lgomp']),
|
||||||
Extension(name='GPy.util.choleskies_cython',
|
Extension(name='GPy.util.choleskies_cython',
|
||||||
sources=['GPy/util/choleskies_cython.c'],
|
sources=['GPy/util/choleskies_cython.c', 'GPy/util/cholesky_backprop.c'],
|
||||||
include_dirs=[np.get_include()],
|
include_dirs=[np.get_include()],
|
||||||
extra_compile_args=compile_flags),
|
extra_link_args = ['-lgomp', '-lblas'],
|
||||||
|
extra_compile_args=compile_flags+['-std=c99']),
|
||||||
Extension(name='GPy.util.linalg_cython',
|
Extension(name='GPy.util.linalg_cython',
|
||||||
sources=['GPy/util/linalg_cython.c'],
|
sources=['GPy/util/linalg_cython.c'],
|
||||||
include_dirs=[np.get_include()],
|
include_dirs=[np.get_include()],
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue