mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-02 14:45:15 +02:00
manual merging with AS
This commit is contained in:
commit
e88b8a88d1
16 changed files with 724 additions and 91 deletions
|
|
@ -6,14 +6,13 @@ import sys
|
|||
from .. import kern
|
||||
from .model import Model
|
||||
from .parameterization import ObsAr
|
||||
from .model import Model
|
||||
from .mapping import Mapping
|
||||
from .parameterization import ObsAr
|
||||
from .. import likelihoods
|
||||
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
|
||||
from .parameterization.variational import VariationalPosterior
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from GPy.util.normalizer import MeanNorm
|
||||
logger = logging.getLogger("GP")
|
||||
|
||||
|
|
@ -65,10 +64,14 @@ class GP(Model):
|
|||
self.Y = ObsAr(Y)
|
||||
self.Y_normalized = self.Y
|
||||
|
||||
assert Y.shape[0] == self.num_data
|
||||
if Y.shape[0] != self.num_data:
|
||||
#There can be cases where we want inputs than outputs, for example if we have multiple latent
|
||||
#function values
|
||||
warnings.warn("There are more rows in your input data X, \
|
||||
than in your output data Y, be VERY sure this is what you want")
|
||||
_, self.output_dim = self.Y.shape
|
||||
|
||||
#TODO: check the type of this is okay?
|
||||
assert ((Y_metadata is None) or isinstance(Y_metadata, dict))
|
||||
self.Y_metadata = Y_metadata
|
||||
|
||||
assert isinstance(kernel, kern.Kern)
|
||||
|
|
@ -296,7 +299,7 @@ class GP(Model):
|
|||
:type size: int.
|
||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
|
||||
:type full_cov: bool.
|
||||
:returns: Ysim: set of simulations
|
||||
:returns: fsim: set of simulations
|
||||
:rtype: np.ndarray (N x samples)
|
||||
"""
|
||||
m, v = self._raw_predict(X, full_cov=full_cov)
|
||||
|
|
@ -304,11 +307,11 @@ class GP(Model):
|
|||
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
|
||||
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
|
||||
if not full_cov:
|
||||
Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
|
||||
fsim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
|
||||
else:
|
||||
Ysim = np.random.multivariate_normal(m.flatten(), v, size).T
|
||||
fsim = np.random.multivariate_normal(m.flatten(), v, size).T
|
||||
|
||||
return Ysim
|
||||
return fsim
|
||||
|
||||
def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -324,16 +327,16 @@ class GP(Model):
|
|||
:type noise_model: integer.
|
||||
:returns: Ysim: set of simulations, a Numpy array (N x samples).
|
||||
"""
|
||||
Ysim = self.posterior_samples_f(X, size, full_cov=full_cov)
|
||||
Ysim = self.likelihood.samples(Ysim, Y_metadata)
|
||||
|
||||
fsim = self.posterior_samples_f(X, size, full_cov=full_cov)
|
||||
Ysim = self.likelihood.samples(fsim, Y_metadata)
|
||||
return Ysim
|
||||
|
||||
def plot_f(self, plot_limits=None, which_data_rows='all',
|
||||
which_data_ycols='all', fixed_inputs=[],
|
||||
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||
plot_raw=True,
|
||||
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
|
||||
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx',
|
||||
apply_link=False):
|
||||
"""
|
||||
Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
|
||||
This is a call to plot with plot_raw=True.
|
||||
|
|
@ -370,6 +373,8 @@ class GP(Model):
|
|||
:type Y_metadata: dict
|
||||
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
|
||||
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
|
||||
:param apply_link: if there is a link function of the likelihood, plot the link(f*) rather than f*
|
||||
:type apply_link: boolean
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import models_plots
|
||||
|
|
@ -382,7 +387,7 @@ class GP(Model):
|
|||
which_data_ycols, fixed_inputs,
|
||||
levels, samples, fignum, ax, resolution,
|
||||
plot_raw=plot_raw, Y_metadata=Y_metadata,
|
||||
data_symbol=data_symbol, **kw)
|
||||
data_symbol=data_symbol, apply_link=apply_link, **kw)
|
||||
|
||||
def plot(self, plot_limits=None, which_data_rows='all',
|
||||
which_data_ycols='all', fixed_inputs=[],
|
||||
|
|
|
|||
|
|
@ -256,7 +256,7 @@ class Model(Parameterized):
|
|||
else:
|
||||
optimizer = optimization.get_optimizer(optimizer)
|
||||
opt = optimizer(start, model=self, max_iters=max_iters, **kwargs)
|
||||
|
||||
|
||||
with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook) as vo:
|
||||
opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)
|
||||
vo.finish(opt)
|
||||
|
|
@ -371,7 +371,12 @@ class Model(Parameterized):
|
|||
f1 = self._objective(xx)
|
||||
xx[xind] -= 2.*step
|
||||
f2 = self._objective(xx)
|
||||
df_ratio = np.abs((f1 - f2) / min(f1, f2))
|
||||
#Avoid divide by zero, if any of the values are above 1e-15, otherwise both values are essentiall
|
||||
#the same
|
||||
if f1 > 1e-15 or f1 < -1e-15 or f2 > 1e-15 or f2 < -1e-15:
|
||||
df_ratio = np.abs((f1 - f2) / min(f1, f2))
|
||||
else:
|
||||
df_ratio = 1.0
|
||||
df_unstable = df_ratio < df_tolerance
|
||||
numerical_gradient = (f1 - f2) / (2 * step)
|
||||
if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
|
||||
|
|
|
|||
|
|
@ -64,3 +64,17 @@ class ExactGaussianInference(LatentFunctionInference):
|
|||
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata)
|
||||
|
||||
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
|
||||
|
||||
def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None):
|
||||
"""
|
||||
Leave one out error as found in
|
||||
"Bayesian leave-one-out cross-validation approximations for Gaussian latent variable models"
|
||||
Vehtari et al. 2014.
|
||||
"""
|
||||
g = posterior.woodbury_vector
|
||||
c = posterior.woodbury_inv
|
||||
c_diag = np.diag(c)[:, None]
|
||||
neg_log_marginal_LOO = 0.5*np.log(2*np.pi) - 0.5*np.log(c_diag) + 0.5*(g**2)/c_diag
|
||||
#believe from Predictive Approaches for Choosing Hyperparameters in Gaussian Processes
|
||||
#this is the negative marginal LOO
|
||||
return -neg_log_marginal_LOO
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ def warning_on_one_line(message, category, filename, lineno, file=None, line=Non
|
|||
warnings.formatwarning = warning_on_one_line
|
||||
from scipy import optimize
|
||||
from . import LatentFunctionInference
|
||||
from scipy.integrate import quad
|
||||
|
||||
class Laplace(LatentFunctionInference):
|
||||
|
||||
|
|
@ -39,6 +40,85 @@ class Laplace(LatentFunctionInference):
|
|||
self.first_run = True
|
||||
self._previous_Ki_fhat = None
|
||||
|
||||
def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None, f_hat=None, W=None, Ki_W_i=None):
|
||||
"""
|
||||
Leave one out log predictive density as found in
|
||||
"Bayesian leave-one-out cross-validation approximations for Gaussian latent variable models"
|
||||
Vehtari et al. 2014.
|
||||
"""
|
||||
Ki_f_init = np.zeros_like(Y)
|
||||
|
||||
if K is None:
|
||||
K = kern.K(X)
|
||||
|
||||
if f_hat is None:
|
||||
f_hat, _ = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
|
||||
|
||||
if W is None:
|
||||
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
|
||||
|
||||
if Ki_W_i is None:
|
||||
_, _, _, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
|
||||
logpdf_dfhat = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata)
|
||||
|
||||
if W.shape[1] == 1:
|
||||
W = np.diagflat(W)
|
||||
|
||||
#Eq 14, and 16
|
||||
var_site = 1./np.diag(W)[:, None]
|
||||
mu_site = f_hat + var_site*logpdf_dfhat
|
||||
prec_site = 1./var_site
|
||||
#Eq 19
|
||||
marginal_cov = Ki_W_i
|
||||
marginal_mu = marginal_cov.dot(np.diagflat(prec_site)).dot(mu_site)
|
||||
marginal_var = np.diag(marginal_cov)[:, None]
|
||||
#Eq 30 with using site parameters instead of Gaussian site parameters
|
||||
#(var_site instead of sigma^{2} )
|
||||
posterior_cav_var = 1./(1./marginal_var - 1./var_site)
|
||||
posterior_cav_mean = posterior_cav_var*((1./marginal_var)*marginal_mu - (1./var_site)*Y)
|
||||
|
||||
flat_y = Y.flatten()
|
||||
flat_mu = posterior_cav_mean.flatten()
|
||||
flat_var = posterior_cav_var.flatten()
|
||||
|
||||
if Y_metadata is not None:
|
||||
#Need to zip individual elements of Y_metadata aswell
|
||||
Y_metadata_flat = {}
|
||||
if Y_metadata is not None:
|
||||
for key, val in Y_metadata.items():
|
||||
Y_metadata_flat[key] = np.atleast_1d(val).reshape(-1, 1)
|
||||
|
||||
zipped_values = []
|
||||
|
||||
for i in range(Y.shape[0]):
|
||||
y_m = {}
|
||||
for key, val in Y_metadata_flat.items():
|
||||
if np.isscalar(val) or val.shape[0] == 1:
|
||||
y_m[key] = val
|
||||
else:
|
||||
#Won't broadcast yet
|
||||
y_m[key] = val[i]
|
||||
zipped_values.append((flat_y[i], flat_mu[i], flat_var[i], y_m))
|
||||
else:
|
||||
#Otherwise just pass along None's
|
||||
zipped_values = zip(flat_y, flat_mu, flat_var, [None]*Y.shape[0])
|
||||
|
||||
def integral_generator(yi, mi, vi, yi_m):
|
||||
def f(fi_star):
|
||||
#More stable in the log space
|
||||
p_fi = np.exp(likelihood.logpdf(fi_star, yi, yi_m)
|
||||
- 0.5*np.log(2*np.pi*vi)
|
||||
- 0.5*np.square(mi-fi_star)/vi)
|
||||
return p_fi
|
||||
return f
|
||||
|
||||
#Eq 30
|
||||
p_ystar, _ = zip(*[quad(integral_generator(y, m, v, yi_m), -np.inf, np.inf)
|
||||
for y, m, v, yi_m in zipped_values])
|
||||
p_ystar = np.array(p_ystar).reshape(-1, 1)
|
||||
return np.log(p_ystar)
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
|
||||
"""
|
||||
Returns a Posterior class containing essential quantities of the posterior
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import itertools
|
|||
|
||||
def index_to_slices(index):
|
||||
"""
|
||||
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
|
||||
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
|
||||
|
||||
e.g.
|
||||
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
|
||||
|
|
@ -79,10 +79,10 @@ class IndependentOutputs(CombinationKernel):
|
|||
|
||||
def update_gradients_full(self,dL_dK,X,X2=None):
|
||||
slices = index_to_slices(X[:,self.index_dim])
|
||||
if self.single_kern:
|
||||
if self.single_kern:
|
||||
target = np.zeros(self.kern.size)
|
||||
kerns = itertools.repeat(self.kern)
|
||||
else:
|
||||
else:
|
||||
kerns = self.kern
|
||||
target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
|
||||
def collate_grads(kern, i, dL, X, X2):
|
||||
|
|
@ -94,7 +94,7 @@ class IndependentOutputs(CombinationKernel):
|
|||
else:
|
||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
|
||||
if self.single_kern:
|
||||
if self.single_kern:
|
||||
self.kern.gradient = target
|
||||
else:
|
||||
[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
|
||||
|
|
@ -104,12 +104,14 @@ class IndependentOutputs(CombinationKernel):
|
|||
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||
if X2 is None:
|
||||
# TODO: make use of index_to_slices
|
||||
# FIXME: Broken as X is already sliced out
|
||||
print "Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!"
|
||||
values = np.unique(X[:,self.index_dim])
|
||||
slices = [X[:,self.index_dim]==i for i in values]
|
||||
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
|
||||
for kern, s in zip(kerns, slices)]
|
||||
#slices = index_to_slices(X[:,self.index_dim])
|
||||
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
|
||||
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
|
||||
# for s in slices_i] for kern, slices_i in zip(kerns, slices)]
|
||||
#import ipdb;ipdb.set_trace()
|
||||
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
|
||||
|
|
|
|||
|
|
@ -132,10 +132,8 @@ class Gaussian(Likelihood):
|
|||
:returns: log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
N = y.shape[0]
|
||||
ln_det_cov = np.log(self.variance)
|
||||
|
||||
return -0.5*((y-link_f)**2/self.variance + ln_det_cov + np.log(2.*np.pi))
|
||||
return -(1.0/(2*self.variance))*((y-link_f)**2) - 0.5*ln_det_cov - 0.5*np.log(2.*np.pi)
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -220,7 +218,6 @@ class Gaussian(Likelihood):
|
|||
"""
|
||||
e = y - link_f
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
N = y.shape[0]
|
||||
dlik_dsigma = -0.5/self.variance + 0.5*s_4*np.square(e)
|
||||
return dlik_dsigma
|
||||
|
||||
|
|
|
|||
|
|
@ -114,21 +114,24 @@ class Likelihood(Parameterized):
|
|||
#Otherwise just pass along None's
|
||||
zipped_values = zip(flat_y_test, flat_mu_star, flat_var_star, [None]*y_test.shape[0])
|
||||
|
||||
def integral_generator(y, m, v, y_m):
|
||||
"""Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
|
||||
def f(f_star):
|
||||
def integral_generator(yi, mi, vi, yi_m):
|
||||
"""Generate a function which can be integrated
|
||||
to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
|
||||
def f(fi_star):
|
||||
#exponent = np.exp(-(1./(2*v))*np.square(m-f_star))
|
||||
#from GPy.util.misc import safe_exp
|
||||
#exponent = safe_exp(exponent)
|
||||
#return self.pdf(f_star, y, y_m)*exponent
|
||||
|
||||
#More stable in the log space
|
||||
return np.exp(self.logpdf(f_star, y, y_m) -(1./(2*v))*np.square(m-f_star))
|
||||
return np.exp(self.logpdf(fi_star, yi, yi_m)
|
||||
- 0.5*np.log(2*np.pi*vi)
|
||||
- 0.5*np.square(mi-fi_star)/vi)
|
||||
return f
|
||||
|
||||
scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v, y_m), -np.inf, np.inf) for y, m, v, y_m in zipped_values])
|
||||
scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1)
|
||||
p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star)
|
||||
p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf)
|
||||
for yi, mi, vi, yi_m in zipped_values])
|
||||
p_ystar = np.array(p_ystar).reshape(-1, 1)
|
||||
return np.log(p_ystar)
|
||||
|
||||
def _moments_match_ep(self,obs,tau,v):
|
||||
|
|
@ -545,9 +548,9 @@ class Likelihood(Parameterized):
|
|||
|
||||
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
|
||||
# ensure we have gradients for every parameter we want to optimize
|
||||
assert dlogpdf_dtheta.shape[0] == self.size #f, d x num_param array
|
||||
assert dlogpdf_df_dtheta.shape[0] == self.size #f x d x num_param matrix or just f x num_param
|
||||
assert d2logpdf_df2_dtheta.shape[0] == self.size #f x num_param matrix or f x d x num_param matrix, f x f x num_param or f x f x d x num_param
|
||||
assert dlogpdf_dtheta.shape[0] == self.size #num_param array x f, d
|
||||
assert dlogpdf_df_dtheta.shape[0] == self.size #num_param x f x d x matrix or just num_param x f
|
||||
assert d2logpdf_df2_dtheta.shape[0] == self.size #num_param x f matrix or num_param x f x d x matrix, num_param x f x f or num_param x f x f x d
|
||||
|
||||
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
||||
|
||||
|
|
|
|||
|
|
@ -4,9 +4,7 @@
|
|||
import numpy as np
|
||||
from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf
|
||||
import scipy as sp
|
||||
|
||||
_exp_lim_val = np.finfo(np.float64).max
|
||||
_lim_val = np.log(_exp_lim_val)
|
||||
from ..util.misc import safe_exp, safe_square, safe_cube, safe_quad, safe_three_times
|
||||
|
||||
class GPTransformation(object):
|
||||
"""
|
||||
|
|
@ -63,12 +61,13 @@ class Identity(GPTransformation):
|
|||
def d3transf_df3(self,f):
|
||||
return np.zeros_like(f)
|
||||
|
||||
|
||||
class Probit(GPTransformation):
|
||||
"""
|
||||
.. math::
|
||||
|
||||
g(f) = \\Phi^{-1} (mu)
|
||||
|
||||
|
||||
"""
|
||||
def transf(self,f):
|
||||
return std_norm_cdf(f)
|
||||
|
|
@ -80,7 +79,7 @@ class Probit(GPTransformation):
|
|||
return -f * std_norm_pdf(f)
|
||||
|
||||
def d3transf_df3(self,f):
|
||||
return (np.square(f)-1.)*std_norm_pdf(f)
|
||||
return (safe_square(f)-1.)*std_norm_pdf(f)
|
||||
|
||||
|
||||
class Cloglog(GPTransformation):
|
||||
|
|
@ -96,19 +95,23 @@ class Cloglog(GPTransformation):
|
|||
|
||||
"""
|
||||
def transf(self,f):
|
||||
return 1-np.exp(-np.exp(f))
|
||||
ef = safe_exp(f)
|
||||
return 1-np.exp(-ef)
|
||||
|
||||
def dtransf_df(self,f):
|
||||
return np.exp(f-np.exp(f))
|
||||
ef = safe_exp(f)
|
||||
return np.exp(f-ef)
|
||||
|
||||
def d2transf_df2(self,f):
|
||||
ef = np.exp(f)
|
||||
ef = safe_exp(f)
|
||||
return -np.exp(f-ef)*(ef-1.)
|
||||
|
||||
def d3transf_df3(self,f):
|
||||
ef = np.exp(f)
|
||||
return np.exp(f-ef)*(1.-3*ef + ef**2)
|
||||
|
||||
ef = safe_exp(f)
|
||||
ef2 = safe_square(ef)
|
||||
three_times_ef = safe_three_times(ef)
|
||||
r_val = np.exp(f-ef)*(1.-three_times_ef + ef2)
|
||||
return r_val
|
||||
|
||||
class Log(GPTransformation):
|
||||
"""
|
||||
|
|
@ -118,16 +121,16 @@ class Log(GPTransformation):
|
|||
|
||||
"""
|
||||
def transf(self,f):
|
||||
return np.exp(np.clip(f, -np.inf, _lim_val))
|
||||
return safe_exp(f)
|
||||
|
||||
def dtransf_df(self,f):
|
||||
return np.exp(np.clip(f, -np.inf, _lim_val))
|
||||
return safe_exp(f)
|
||||
|
||||
def d2transf_df2(self,f):
|
||||
return np.exp(np.clip(f, -np.inf, _lim_val))
|
||||
return safe_exp(f)
|
||||
|
||||
def d3transf_df3(self,f):
|
||||
return np.exp(np.clip(f, -np.inf, _lim_val))
|
||||
return safe_exp(f)
|
||||
|
||||
class Log_ex_1(GPTransformation):
|
||||
"""
|
||||
|
|
@ -137,17 +140,20 @@ class Log_ex_1(GPTransformation):
|
|||
|
||||
"""
|
||||
def transf(self,f):
|
||||
return np.log(1.+np.exp(f))
|
||||
return np.log1p(safe_exp(f))
|
||||
|
||||
def dtransf_df(self,f):
|
||||
return np.exp(f)/(1.+np.exp(f))
|
||||
ef = safe_exp(f)
|
||||
return ef/(1.+ef)
|
||||
|
||||
def d2transf_df2(self,f):
|
||||
aux = np.exp(f)/(1.+np.exp(f))
|
||||
ef = safe_exp(f)
|
||||
aux = ef/(1.+ef)
|
||||
return aux*(1.-aux)
|
||||
|
||||
def d3transf_df3(self,f):
|
||||
aux = np.exp(f)/(1.+np.exp(f))
|
||||
ef = safe_exp(f)
|
||||
aux = ef/(1.+ef)
|
||||
daux_df = aux*(1.-aux)
|
||||
return daux_df - (2.*aux*daux_df)
|
||||
|
||||
|
|
@ -155,14 +161,17 @@ class Reciprocal(GPTransformation):
|
|||
def transf(self,f):
|
||||
return 1./f
|
||||
|
||||
def dtransf_df(self,f):
|
||||
return -1./(f**2)
|
||||
def dtransf_df(self, f):
|
||||
f2 = safe_square(f)
|
||||
return -1./f2
|
||||
|
||||
def d2transf_df2(self,f):
|
||||
return 2./(f**3)
|
||||
def d2transf_df2(self, f):
|
||||
f3 = safe_cube(f)
|
||||
return 2./f3
|
||||
|
||||
def d3transf_df3(self,f):
|
||||
return -6./(f**4)
|
||||
f4 = safe_quad(f)
|
||||
return -6./f4
|
||||
|
||||
class Heaviside(GPTransformation):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from .sparse_gplvm import SparseGPLVM
|
|||
from .warped_gp import WarpedGP
|
||||
from .bayesian_gplvm import BayesianGPLVM
|
||||
from .mrd import MRD
|
||||
from .gradient_checker import GradientChecker
|
||||
from .gradient_checker import GradientChecker, HessianChecker, SkewChecker
|
||||
from .ss_gplvm import SSGPLVM
|
||||
from .gp_coregionalized_regression import GPCoregionalizedRegression
|
||||
from .sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class BayesianGPLVM(SparseGP_MPI):
|
|||
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None,
|
||||
name='bayesian gplvm', mpi_comm=None, normalizer=None,
|
||||
missing_data=False, stochastic=False, batchsize=1):
|
||||
missing_data=False, stochastic=False, batchsize=1, Y_metadata=None):
|
||||
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
if X is None:
|
||||
|
|
@ -69,6 +69,7 @@ class BayesianGPLVM(SparseGP_MPI):
|
|||
name=name, inference_method=inference_method,
|
||||
normalizer=normalizer, mpi_comm=mpi_comm,
|
||||
variational_prior=self.variational_prior,
|
||||
Y_metadata=Y_metadata
|
||||
)
|
||||
self.link_parameter(self.X, index=0)
|
||||
|
||||
|
|
@ -83,7 +84,7 @@ class BayesianGPLVM(SparseGP_MPI):
|
|||
def parameters_changed(self):
|
||||
super(BayesianGPLVM,self).parameters_changed()
|
||||
if isinstance(self.inference_method, VarDTC_minibatch):
|
||||
return
|
||||
return
|
||||
|
||||
kl_fctr = 1.
|
||||
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ from ..core.model import Model
|
|||
import itertools
|
||||
import numpy
|
||||
from ..core.parameterization import Param
|
||||
np = numpy
|
||||
from ..util.block_matrices import get_blocks, get_block_shapes, unblock, get_blocks_3d, get_block_shapes_3d
|
||||
|
||||
def get_shape(x):
|
||||
if isinstance(x, numpy.ndarray):
|
||||
|
|
@ -111,3 +113,261 @@ class GradientChecker(Model):
|
|||
#for name, shape in zip(self.names, self.shapes):
|
||||
#_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
|
||||
#return _param_names
|
||||
|
||||
|
||||
class HessianChecker(GradientChecker):
|
||||
|
||||
def __init__(self, f, df, ddf, x0, names=None, *args, **kwargs):
|
||||
"""
|
||||
:param f: Function (only used for numerical hessian gradient)
|
||||
:param df: Gradient of function to check
|
||||
:param ddf: Analytical gradient function
|
||||
:param x0:
|
||||
Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
|
||||
Can be a list of arrays, if takes a list of arrays. This list will be passed
|
||||
to f and df in the same order as given here.
|
||||
If only one argument, make sure not to pass a list!!!
|
||||
|
||||
:type x0: [array-like] | array-like | float | int
|
||||
:param names:
|
||||
Names to print, when performing gradcheck. If a list was passed to x0
|
||||
a list of names with the same length is expected.
|
||||
:param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
|
||||
|
||||
"""
|
||||
super(HessianChecker, self).__init__(df, ddf, x0, names=names, *args, **kwargs)
|
||||
self._f = f
|
||||
self._df = df
|
||||
self._ddf = ddf
|
||||
|
||||
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False):
|
||||
"""
|
||||
Overwrite checkgrad method to check whole block instead of looping through
|
||||
|
||||
Shows diagnostics using matshow instead
|
||||
|
||||
:param verbose: If True, print a "full" checking of each parameter
|
||||
:type verbose: bool
|
||||
:param step: The size of the step around which to linearise the objective
|
||||
:type step: float (default 1e-6)
|
||||
:param tolerance: the tolerance allowed (see note)
|
||||
:type tolerance: float (default 1e-3)
|
||||
|
||||
Note:-
|
||||
The gradient is considered correct if the ratio of the analytical
|
||||
and numerical gradients is within <tolerance> of unity.
|
||||
"""
|
||||
try:
|
||||
import numdifftools as nd
|
||||
except:
|
||||
raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests")
|
||||
|
||||
if target_param:
|
||||
raise NotImplementedError('Only basic functionality is provided with this gradchecker')
|
||||
|
||||
#Repeat for each parameter, not the nicest but shouldn't be many cases where there are many
|
||||
#variables
|
||||
current_index = 0
|
||||
for name, shape in zip(self.names, self.shapes):
|
||||
current_size = numpy.prod(shape)
|
||||
x = self.optimizer_array.copy()
|
||||
#x = self._get_params_transformed().copy()
|
||||
x = x[current_index:current_index + current_size].reshape(shape)
|
||||
|
||||
# Check gradients
|
||||
analytic_hess = self._ddf(x)
|
||||
if analytic_hess.shape[1] == 1:
|
||||
analytic_hess = numpy.diagflat(analytic_hess)
|
||||
|
||||
#From the docs:
|
||||
#x0 : vector location
|
||||
#at which to differentiate fun
|
||||
#If x0 is an N x M array, then fun is assumed to be a function
|
||||
#of N*M variables., thus we must have it flat, not (N,1), but just (N,)
|
||||
#numeric_hess_partial = nd.Hessian(self._f, vectorized=False)
|
||||
numeric_hess_partial = nd.Jacobian(self._df, vectorized=False)
|
||||
#numeric_hess_partial = nd.Derivative(self._df, vectorized=True)
|
||||
numeric_hess = numeric_hess_partial(x)
|
||||
|
||||
check_passed = self.checkgrad_block(analytic_hess, numeric_hess, verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=plot)
|
||||
current_index += current_size
|
||||
return check_passed
|
||||
|
||||
def checkgrad_block(self, analytic_hess, numeric_hess, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False):
|
||||
"""
|
||||
Checkgrad a block matrix
|
||||
"""
|
||||
if analytic_hess.dtype is np.dtype('object'):
|
||||
#Make numeric hessian also into a block matrix
|
||||
real_size = get_block_shapes(analytic_hess)
|
||||
num_elements = np.sum(real_size)
|
||||
if (num_elements, num_elements) == numeric_hess.shape:
|
||||
#If the sizes are the same we assume they are the same
|
||||
#(we have not fixed any values so the numeric is the whole hessian)
|
||||
numeric_hess = get_blocks(numeric_hess, real_size)
|
||||
else:
|
||||
#Make a fake empty matrix and fill out the correct block
|
||||
tmp_numeric_hess = get_blocks(np.zeros((num_elements, num_elements)), real_size)
|
||||
tmp_numeric_hess[block_indices] = numeric_hess.copy()
|
||||
numeric_hess = tmp_numeric_hess
|
||||
|
||||
if block_indices is not None:
|
||||
#Extract the right block
|
||||
analytic_hess = analytic_hess[block_indices]
|
||||
numeric_hess = numeric_hess[block_indices]
|
||||
else:
|
||||
#Unblock them if they are in blocks and you aren't checking a single block (checking whole hessian)
|
||||
if analytic_hess.dtype is np.dtype('object'):
|
||||
analytic_hess = unblock(analytic_hess)
|
||||
numeric_hess = unblock(numeric_hess)
|
||||
|
||||
ratio = numeric_hess / (numpy.where(analytic_hess==0, 1e-10, analytic_hess))
|
||||
difference = numpy.abs(analytic_hess - numeric_hess)
|
||||
|
||||
check_passed = numpy.all((numpy.abs(1 - ratio)) < tolerance) or numpy.allclose(numeric_hess, analytic_hess, atol = tolerance)
|
||||
|
||||
if verbose:
|
||||
if block_indices:
|
||||
print "\nBlock {}".format(block_indices)
|
||||
else:
|
||||
print "\nAll blocks"
|
||||
|
||||
header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference']
|
||||
header_string = map(lambda x: ' | '.join(header), [header])
|
||||
separator = '-' * len(header_string[0])
|
||||
print '\n'.join([header_string[0], separator])
|
||||
min_r = '%.6f' % float(numpy.min(ratio))
|
||||
max_r = '%.6f' % float(numpy.max(ratio))
|
||||
max_d = '%.6f' % float(numpy.max(difference))
|
||||
min_d = '%.6f' % float(numpy.min(difference))
|
||||
cols = [max_r, min_r, min_d, max_d]
|
||||
|
||||
if check_passed:
|
||||
checked = "\033[92m True \033[0m"
|
||||
else:
|
||||
checked = "\033[91m False \033[0m"
|
||||
|
||||
grad_string = "{} | {} | {} | {} | {} ".format(checked, cols[0], cols[1], cols[2], cols[3])
|
||||
print grad_string
|
||||
|
||||
if plot:
|
||||
import pylab as pb
|
||||
fig, axes = pb.subplots(2, 2)
|
||||
max_lim = numpy.max(numpy.vstack((analytic_hess, numeric_hess)))
|
||||
min_lim = numpy.min(numpy.vstack((analytic_hess, numeric_hess)))
|
||||
msa = axes[0,0].matshow(analytic_hess, vmin=min_lim, vmax=max_lim)
|
||||
axes[0,0].set_title('Analytic hessian')
|
||||
axes[0,0].xaxis.set_ticklabels([None])
|
||||
axes[0,0].yaxis.set_ticklabels([None])
|
||||
axes[0,0].xaxis.set_ticks([None])
|
||||
axes[0,0].yaxis.set_ticks([None])
|
||||
msn = axes[0,1].matshow(numeric_hess, vmin=min_lim, vmax=max_lim)
|
||||
pb.colorbar(msn, ax=axes[0,1])
|
||||
axes[0,1].set_title('Numeric hessian')
|
||||
axes[0,1].xaxis.set_ticklabels([None])
|
||||
axes[0,1].yaxis.set_ticklabels([None])
|
||||
axes[0,1].xaxis.set_ticks([None])
|
||||
axes[0,1].yaxis.set_ticks([None])
|
||||
msr = axes[1,0].matshow(ratio)
|
||||
pb.colorbar(msr, ax=axes[1,0])
|
||||
axes[1,0].set_title('Ratio')
|
||||
axes[1,0].xaxis.set_ticklabels([None])
|
||||
axes[1,0].yaxis.set_ticklabels([None])
|
||||
axes[1,0].xaxis.set_ticks([None])
|
||||
axes[1,0].yaxis.set_ticks([None])
|
||||
msd = axes[1,1].matshow(difference)
|
||||
pb.colorbar(msd, ax=axes[1,1])
|
||||
axes[1,1].set_title('difference')
|
||||
axes[1,1].xaxis.set_ticklabels([None])
|
||||
axes[1,1].yaxis.set_ticklabels([None])
|
||||
axes[1,1].xaxis.set_ticks([None])
|
||||
axes[1,1].yaxis.set_ticks([None])
|
||||
if block_indices:
|
||||
fig.suptitle("Block: {}".format(block_indices))
|
||||
pb.show()
|
||||
|
||||
return check_passed
|
||||
|
||||
class SkewChecker(HessianChecker):
|
||||
|
||||
def __init__(self, df, ddf, dddf, x0, names=None, *args, **kwargs):
|
||||
"""
|
||||
:param df: gradient of function
|
||||
:param ddf: Gradient of function to check (hessian)
|
||||
:param dddf: Analytical gradient function (third derivative)
|
||||
:param x0:
|
||||
Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
|
||||
Can be a list of arrays, if takes a list of arrays. This list will be passed
|
||||
to f and df in the same order as given here.
|
||||
If only one argument, make sure not to pass a list!!!
|
||||
|
||||
:type x0: [array-like] | array-like | float | int
|
||||
:param names:
|
||||
Names to print, when performing gradcheck. If a list was passed to x0
|
||||
a list of names with the same length is expected.
|
||||
:param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
|
||||
|
||||
"""
|
||||
super(SkewChecker, self).__init__(df, ddf, dddf, x0, names=names, *args, **kwargs)
|
||||
|
||||
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False, super_plot=False):
|
||||
"""
|
||||
Gradient checker that just checks each hessian individually
|
||||
|
||||
super_plot will plot the hessian wrt every parameter, plot will just do the first one
|
||||
"""
|
||||
try:
|
||||
import numdifftools as nd
|
||||
except:
|
||||
raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests")
|
||||
|
||||
if target_param:
|
||||
raise NotImplementedError('Only basic functionality is provided with this gradchecker')
|
||||
|
||||
#Repeat for each parameter, not the nicest but shouldn't be many cases where there are many
|
||||
#variables
|
||||
current_index = 0
|
||||
for name, n_shape in zip(self.names, self.shapes):
|
||||
current_size = numpy.prod(n_shape)
|
||||
x = self.optimizer_array.copy()
|
||||
#x = self._get_params_transformed().copy()
|
||||
x = x[current_index:current_index + current_size].reshape(n_shape)
|
||||
|
||||
# Check gradients
|
||||
#Actually the third derivative
|
||||
analytic_hess = self._ddf(x)
|
||||
|
||||
#Can only calculate jacobian for one variable at a time
|
||||
#From the docs:
|
||||
#x0 : vector location
|
||||
#at which to differentiate fun
|
||||
#If x0 is an N x M array, then fun is assumed to be a function
|
||||
#of N*M variables., thus we must have it flat, not (N,1), but just (N,)
|
||||
#numeric_hess_partial = nd.Hessian(self._f, vectorized=False)
|
||||
#Actually _df is already the hessian
|
||||
numeric_hess_partial = nd.Jacobian(self._df, vectorized=True)
|
||||
numeric_hess = numeric_hess_partial(x)
|
||||
|
||||
print "Done making numerical hessian"
|
||||
if analytic_hess.dtype is np.dtype('object'):
|
||||
#Blockify numeric_hess aswell
|
||||
blocksizes, pagesizes = get_block_shapes_3d(analytic_hess)
|
||||
#HACK
|
||||
real_block_size = np.sum(blocksizes)
|
||||
numeric_hess = numeric_hess.reshape(real_block_size, real_block_size, pagesizes)
|
||||
#numeric_hess = get_blocks_3d(numeric_hess, blocksizes)#, pagesizes)
|
||||
else:
|
||||
numeric_hess = numeric_hess.reshape(*analytic_hess.shape)
|
||||
|
||||
#Check every block individually (for ease)
|
||||
check_passed = [False]*numeric_hess.shape[2]
|
||||
for block_ind in xrange(numeric_hess.shape[2]):
|
||||
#Unless super_plot is set, just plot the first one
|
||||
p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False
|
||||
if verbose:
|
||||
print "Checking derivative of hessian wrt parameter number {}".format(block_ind)
|
||||
check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p)
|
||||
|
||||
current_index += current_size
|
||||
return np.all(check_passed)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2012-2015, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
try:
|
||||
|
|
@ -16,7 +16,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
which_data_ycols='all', fixed_inputs=[],
|
||||
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||
plot_raw=False,
|
||||
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx'):
|
||||
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx',
|
||||
apply_link=False, samples_f=0, plot_uncertain_inputs=True):
|
||||
"""
|
||||
Plot the posterior of the GP.
|
||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||
|
|
@ -38,7 +39,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
:type resolution: int
|
||||
:param levels: number of levels to plot in a contour plot.
|
||||
:type levels: int
|
||||
:param samples: the number of a posteriori samples to plot
|
||||
:param samples: the number of a posteriori samples to plot p(y*|y)
|
||||
:type samples: int
|
||||
:param fignum: figure to plot on.
|
||||
:type fignum: figure number
|
||||
|
|
@ -49,6 +50,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
:type linecol:
|
||||
:param fillcol: color of fill
|
||||
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
||||
:param apply_link: apply the link function if plotting f (default false)
|
||||
:type apply_link: boolean
|
||||
:param samples_f: the number of posteriori f samples to plot p(f*|y)
|
||||
:type samples_f: int
|
||||
"""
|
||||
#deal with optional arguments
|
||||
if which_data_rows == 'all':
|
||||
|
|
@ -88,8 +93,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
#make a prediction on the frame and plot it
|
||||
if plot_raw:
|
||||
m, v = model._raw_predict(Xgrid)
|
||||
lower = m - 2*np.sqrt(v)
|
||||
upper = m + 2*np.sqrt(v)
|
||||
if apply_link:
|
||||
lower = model.likelihood.gp_link.transf(m - 2*np.sqrt(v))
|
||||
upper = model.likelihood.gp_link.transf(m + 2*np.sqrt(v))
|
||||
#Once transformed this is now the median of the function
|
||||
m = model.likelihood.gp_link.transf(m)
|
||||
else:
|
||||
lower = m - 2*np.sqrt(v)
|
||||
upper = m + 2*np.sqrt(v)
|
||||
else:
|
||||
if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
|
||||
meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
|
||||
|
|
@ -110,13 +121,31 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
||||
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
|
||||
|
||||
if samples_f: #NOTE not tested with fixed_inputs
|
||||
Fsim = model.posterior_samples_f(Xgrid, samples_f)
|
||||
for fi in Fsim.T:
|
||||
plots['posterior_samples_f'] = ax.plot(Xnew, fi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
||||
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
|
||||
|
||||
|
||||
#add error bars for uncertain (if input uncertainty is being modelled)
|
||||
if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
|
||||
plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
|
||||
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
|
||||
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
||||
|
||||
if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs() and plot_uncertain_inputs:
|
||||
if plot_raw:
|
||||
#add error bars for uncertain (if input uncertainty is being modelled), for plot_f
|
||||
#Hack to plot error bars on latent function, rather than on the data
|
||||
vs = model.X.mean.values.copy()
|
||||
for i,v in fixed_inputs:
|
||||
vs[:,i] = v
|
||||
m_X, _ = model._raw_predict(vs)
|
||||
if apply_link:
|
||||
m_X = model.likelihood.gp_link.transf(m_X)
|
||||
plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), m_X[which_data_rows, which_data_ycols].flatten(),
|
||||
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
|
||||
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
||||
else:
|
||||
plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
|
||||
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
|
||||
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
||||
|
||||
#set the limits of the plot to some sensible values
|
||||
ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))
|
||||
|
|
@ -186,3 +215,29 @@ def plot_fit_f(model, *args, **kwargs):
|
|||
"""
|
||||
kwargs['plot_raw'] = True
|
||||
plot_fit(model,*args, **kwargs)
|
||||
|
||||
def fixed_inputs(model, non_fixed_inputs, fix_routine='median'):
|
||||
"""
|
||||
Convenience function for returning back fixed_inputs where the other inputs
|
||||
are fixed using fix_routine
|
||||
:param model: model
|
||||
:type model: Model
|
||||
:param non_fixed_inputs: dimensions of non fixed inputs
|
||||
:type non_fixed_inputs: list
|
||||
:param fix_routine: fixing routine to use, 'mean', 'median', 'zero'
|
||||
:type fix_routine: string
|
||||
"""
|
||||
f_inputs = []
|
||||
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
|
||||
X = model.X.mean.values.copy()
|
||||
else:
|
||||
X = model.X.values.copy()
|
||||
for i in range(X.shape[1]):
|
||||
if i not in non_fixed_inputs:
|
||||
if fix_routine == 'mean':
|
||||
f_inputs.append( (i, np.mean(X[:,i])) )
|
||||
if fix_routine == 'median':
|
||||
f_inputs.append( (i, np.median(X[:,i])) )
|
||||
elif fix_routine == 'zero':
|
||||
f_inputs.append( (i, 0) )
|
||||
return f_inputs
|
||||
|
|
|
|||
143
GPy/testing/link_function_tests.py
Normal file
143
GPy/testing/link_function_tests.py
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
import numpy as np
|
||||
import scipy as sp
|
||||
from scipy.special import cbrt
|
||||
from GPy.models import GradientChecker
|
||||
_lim_val = np.finfo(np.float64).max
|
||||
_lim_val_exp = np.log(_lim_val)
|
||||
_lim_val_square = np.sqrt(_lim_val)
|
||||
_lim_val_cube = cbrt(_lim_val)
|
||||
from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside
|
||||
|
||||
class LinkFunctionTests(np.testing.TestCase):
|
||||
def setUp(self):
|
||||
self.small_f = np.array([[-1e-4]])
|
||||
self.zero_f = np.array([[1e-4]])
|
||||
self.mid_f = np.array([[5.0]])
|
||||
self.large_f = np.array([[1e4]])
|
||||
self.f_lower_lim = np.array(-np.inf)
|
||||
self.f_upper_lim = np.array(np.inf)
|
||||
|
||||
def check_gradient(self, link_func, lim_of_inf, test_lim=False):
|
||||
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.mid_f)
|
||||
self.assertTrue(grad.checkgrad(verbose=True))
|
||||
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.mid_f)
|
||||
self.assertTrue(grad2.checkgrad(verbose=True))
|
||||
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.mid_f)
|
||||
self.assertTrue(grad3.checkgrad(verbose=True))
|
||||
|
||||
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.small_f)
|
||||
self.assertTrue(grad.checkgrad(verbose=True))
|
||||
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.small_f)
|
||||
self.assertTrue(grad2.checkgrad(verbose=True))
|
||||
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.small_f)
|
||||
self.assertTrue(grad3.checkgrad(verbose=True))
|
||||
|
||||
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.zero_f)
|
||||
self.assertTrue(grad.checkgrad(verbose=True))
|
||||
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.zero_f)
|
||||
self.assertTrue(grad2.checkgrad(verbose=True))
|
||||
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.zero_f)
|
||||
self.assertTrue(grad3.checkgrad(verbose=True))
|
||||
|
||||
#Do a limit test if the large f value is too large
|
||||
large_f = np.clip(self.large_f, -np.inf, lim_of_inf-1e-3)
|
||||
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=large_f)
|
||||
self.assertTrue(grad.checkgrad(verbose=True))
|
||||
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=large_f)
|
||||
self.assertTrue(grad2.checkgrad(verbose=True))
|
||||
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=large_f)
|
||||
self.assertTrue(grad3.checkgrad(verbose=True))
|
||||
|
||||
if test_lim:
|
||||
print "Testing limits"
|
||||
#Remove some otherwise we are too close to the limit for gradcheck to work effectively
|
||||
lim_of_inf = lim_of_inf - 1e-4
|
||||
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf)
|
||||
self.assertTrue(grad.checkgrad(verbose=True))
|
||||
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=lim_of_inf)
|
||||
self.assertTrue(grad2.checkgrad(verbose=True))
|
||||
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=lim_of_inf)
|
||||
self.assertTrue(grad3.checkgrad(verbose=True))
|
||||
|
||||
def check_overflow(self, link_func, lim_of_inf):
|
||||
#Check that it does something sensible beyond this limit,
|
||||
#note this is not checking the value is correct, just that it isn't nan
|
||||
beyond_lim_of_inf = lim_of_inf + 100.0
|
||||
self.assertFalse(np.isinf(link_func.transf(beyond_lim_of_inf)))
|
||||
self.assertFalse(np.isinf(link_func.dtransf_df(beyond_lim_of_inf)))
|
||||
self.assertFalse(np.isinf(link_func.d2transf_df2(beyond_lim_of_inf)))
|
||||
|
||||
self.assertFalse(np.isnan(link_func.transf(beyond_lim_of_inf)))
|
||||
self.assertFalse(np.isnan(link_func.dtransf_df(beyond_lim_of_inf)))
|
||||
self.assertFalse(np.isnan(link_func.d2transf_df2(beyond_lim_of_inf)))
|
||||
|
||||
def test_log_overflow(self):
|
||||
link = Log()
|
||||
lim_of_inf = _lim_val_exp
|
||||
|
||||
np.testing.assert_almost_equal(np.exp(self.mid_f), link.transf(self.mid_f))
|
||||
assert np.isinf(np.exp(np.log(self.f_upper_lim)))
|
||||
#Check the clipping works
|
||||
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
|
||||
#Need to look at most significant figures here rather than the decimals
|
||||
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), _lim_val, significant=5)
|
||||
self.check_overflow(link, lim_of_inf)
|
||||
|
||||
#Check that it would otherwise fail
|
||||
beyond_lim_of_inf = lim_of_inf + 10.0
|
||||
old_err_state = np.seterr(over='ignore')
|
||||
self.assertTrue(np.isinf(np.exp(beyond_lim_of_inf)))
|
||||
np.seterr(**old_err_state)
|
||||
|
||||
def test_log_ex_1_overflow(self):
|
||||
link = Log_ex_1()
|
||||
lim_of_inf = _lim_val_exp
|
||||
|
||||
np.testing.assert_almost_equal(np.log1p(np.exp(self.mid_f)), link.transf(self.mid_f))
|
||||
assert np.isinf(np.log1p(np.exp(np.log(self.f_upper_lim))))
|
||||
#Check the clipping works
|
||||
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
|
||||
#Need to look at most significant figures here rather than the decimals
|
||||
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), np.log1p(_lim_val), significant=5)
|
||||
self.check_overflow(link, lim_of_inf)
|
||||
|
||||
#Check that it would otherwise fail
|
||||
beyond_lim_of_inf = lim_of_inf + 10.0
|
||||
old_err_state = np.seterr(over='ignore')
|
||||
self.assertTrue(np.isinf(np.log1p(np.exp(beyond_lim_of_inf))))
|
||||
np.seterr(**old_err_state)
|
||||
|
||||
|
||||
def test_log_gradients(self):
|
||||
# transf dtransf_df d2transf_df2 d3transf_df3
|
||||
link = Log()
|
||||
lim_of_inf = _lim_val_exp
|
||||
self.check_gradient(link, lim_of_inf, test_lim=True)
|
||||
|
||||
def test_identity_gradients(self):
|
||||
link = Identity()
|
||||
lim_of_inf = _lim_val
|
||||
#FIXME: Should be able to think of a way to test the limits of this
|
||||
self.check_gradient(link, lim_of_inf, test_lim=False)
|
||||
|
||||
def test_probit_gradients(self):
|
||||
link = Probit()
|
||||
lim_of_inf = _lim_val
|
||||
self.check_gradient(link, lim_of_inf, test_lim=True)
|
||||
|
||||
def test_Cloglog_gradients(self):
|
||||
link = Cloglog()
|
||||
lim_of_inf = _lim_val_exp
|
||||
self.check_gradient(link, lim_of_inf, test_lim=True)
|
||||
|
||||
def test_Log_ex_1_gradients(self):
|
||||
link = Log_ex_1()
|
||||
lim_of_inf = _lim_val_exp
|
||||
self.check_gradient(link, lim_of_inf, test_lim=True)
|
||||
self.check_overflow(link, lim_of_inf)
|
||||
|
||||
def test_reciprocal_gradients(self):
|
||||
link = Reciprocal()
|
||||
lim_of_inf = _lim_val
|
||||
#Does not work with much smaller values, and values closer to zero than 1e-5
|
||||
self.check_gradient(link, lim_of_inf, test_lim=True)
|
||||
|
|
@ -1,9 +1,37 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2014-2015, Alan Saul
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
import numpy as np
|
||||
|
||||
def get_blocks_3d(A, blocksizes, pagesizes=None):
|
||||
"""
|
||||
Given a 3d matrix, make a block matrix, where the first and second dimensions are blocked according
|
||||
to blocksizes, and the pages are blocked using pagesizes
|
||||
"""
|
||||
assert (A.shape[0]==A.shape[1]) and len(A.shape)==3, "can't blockify this non-square matrix, may need to use 2d version"
|
||||
N = np.sum(blocksizes)
|
||||
assert A.shape[0] == N, "bad blocksizes"
|
||||
num_blocks = len(blocksizes)
|
||||
if pagesizes == None:
|
||||
#Assume each page of A should be its own dimension
|
||||
pagesizes = range(A.shape[2])#[0]*A.shape[2]
|
||||
num_pages = len(pagesizes)
|
||||
B = np.empty(shape=(num_blocks, num_blocks, num_pages), dtype=np.object)
|
||||
count_k = 0
|
||||
#for Bk, k in enumerate(pagesizes):
|
||||
for Bk in pagesizes:
|
||||
count_i = 0
|
||||
for Bi, i in enumerate(blocksizes):
|
||||
count_j = 0
|
||||
for Bj, j in enumerate(blocksizes):
|
||||
#We want to have it count_k:count_k + k but its annoying as it makes a NxNx1 array is page sizes are set to 1
|
||||
B[Bi, Bj, Bk] = A[count_i:count_i + i, count_j:count_j + j, Bk]
|
||||
count_j += j
|
||||
count_i += i
|
||||
#count_k += k
|
||||
return B
|
||||
|
||||
def get_blocks(A, blocksizes):
|
||||
assert (A.shape[0]==A.shape[1]) and len(A.shape)==2, "can;t blockify this non-square matrix"
|
||||
assert (A.shape[0]==A.shape[1]) and len(A.shape)==2, "can't blockify this non-square matrix"
|
||||
N = np.sum(blocksizes)
|
||||
assert A.shape[0] == N, "bad blocksizes"
|
||||
num_blocks = len(blocksizes)
|
||||
|
|
@ -17,6 +45,11 @@ def get_blocks(A, blocksizes):
|
|||
count_i += i
|
||||
return B
|
||||
|
||||
def get_block_shapes_3d(B):
|
||||
assert B.dtype is np.dtype('object'), "Must be a block matrix"
|
||||
#FIXME: This isn't general AT ALL...
|
||||
return get_block_shapes(B[:,:,0]), B.shape[2]
|
||||
|
||||
def get_block_shapes(B):
|
||||
assert B.dtype is np.dtype('object'), "Must be a block matrix"
|
||||
return [B[b,b].shape[0] for b in range(0, B.shape[0])]
|
||||
|
|
@ -35,7 +68,7 @@ def unblock(B):
|
|||
count_i += i
|
||||
return A
|
||||
|
||||
def block_dot(A, B):
|
||||
def block_dot(A, B, diagonal=False):
|
||||
"""
|
||||
Element wise dot product on block matricies
|
||||
|
||||
|
|
@ -48,21 +81,30 @@ def block_dot(A, B):
|
|||
+-------------+ +------+------+ +-------+-------+
|
||||
|
||||
..Note
|
||||
If any block of either (A or B) are stored as 1d vectors then we assume
|
||||
that it denotes a diagonal matrix efficient dot product using numpy
|
||||
broadcasting will be used, i.e. A11*B11
|
||||
|
||||
If either (A or B) of the diagonal matrices are stored as vectors then a more
|
||||
efficient dot product using numpy broadcasting will be used, i.e. A11*B11
|
||||
"""
|
||||
#Must have same number of blocks and be a block matrix
|
||||
assert A.dtype is np.dtype('object'), "Must be a block matrix"
|
||||
assert B.dtype is np.dtype('object'), "Must be a block matrix"
|
||||
Ashape = A.shape
|
||||
Bshape = B.shape
|
||||
assert Ashape == Bshape
|
||||
def f(A,B):
|
||||
if Ashape[0] == Ashape[1] or Bshape[0] == Bshape[1]:
|
||||
#FIXME: Careful if one is transpose of other, would make a matrix
|
||||
return A*B
|
||||
assert A.shape == B.shape
|
||||
def f(C,D):
|
||||
"""
|
||||
C is an element of A, D is the associated element of B
|
||||
"""
|
||||
Cshape = C.shape
|
||||
Dshape = D.shape
|
||||
if diagonal and (len(Cshape) == 1 or len(Dshape) == 1\
|
||||
or C.shape[0] != C.shape[1] or D.shape[0] != D.shape[1]):
|
||||
print "Broadcasting, C: {} D:{}".format(C.shape, D.shape)
|
||||
return C*D
|
||||
else:
|
||||
return np.dot(A,B)
|
||||
print "Dotting, C: {} C:{}".format(C.shape, D.shape)
|
||||
return np.dot(C,D)
|
||||
dot = np.vectorize(f, otypes = [np.object])
|
||||
return dot(A,B)
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ try:
|
|||
from scipy import weave
|
||||
except ImportError:
|
||||
config.set('weave', 'working', 'False')
|
||||
|
||||
|
||||
|
||||
_scipyversion = np.float64((scipy.__version__).split('.')[:2])
|
||||
_fix_dpotri_scipy_bug = True
|
||||
|
|
@ -102,7 +102,6 @@ def jitchol(A, maxtries=5):
|
|||
num_tries = 1
|
||||
while num_tries <= maxtries and np.isfinite(jitter):
|
||||
try:
|
||||
print(jitter)
|
||||
L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
|
||||
return L
|
||||
except:
|
||||
|
|
@ -115,7 +114,6 @@ def jitchol(A, maxtries=5):
|
|||
except:
|
||||
logging.warning('\n'.join(['Added jitter of {:.10e}'.format(jitter),
|
||||
' in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
|
||||
import ipdb;ipdb.set_trace()
|
||||
return L
|
||||
|
||||
# def dtrtri(L, lower=1):
|
||||
|
|
|
|||
|
|
@ -2,18 +2,37 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from scipy.special import cbrt
|
||||
from .config import *
|
||||
|
||||
_lim_val = np.finfo(np.float64).max
|
||||
|
||||
_lim_val_exp = np.log(_lim_val)
|
||||
_lim_val_square = np.sqrt(_lim_val)
|
||||
_lim_val_cube = np.power(_lim_val, -3)
|
||||
#_lim_val_cube = cbrt(_lim_val)
|
||||
_lim_val_cube = np.nextafter(_lim_val**(1/3.0), -np.inf)
|
||||
_lim_val_quad = np.nextafter(_lim_val**(1/4.0), -np.inf)
|
||||
_lim_val_three_times = np.nextafter(_lim_val/3.0, -np.inf)
|
||||
|
||||
def safe_exp(f):
|
||||
clip_f = np.clip(f, -np.inf, _lim_val_exp)
|
||||
return np.exp(clip_f)
|
||||
|
||||
def safe_square(f):
|
||||
f = np.clip(f, -np.inf, _lim_val_square)
|
||||
return f**2
|
||||
|
||||
def safe_cube(f):
|
||||
f = np.clip(f, -np.inf, _lim_val_cube)
|
||||
return f**3
|
||||
|
||||
def safe_quad(f):
|
||||
f = np.clip(f, -np.inf, _lim_val_quad)
|
||||
return f**4
|
||||
|
||||
def safe_three_times(f):
|
||||
f = np.clip(f, -np.inf, _lim_val_three_times)
|
||||
return 3*f
|
||||
|
||||
def chain_1(df_dg, dg_dx):
|
||||
"""
|
||||
Generic chaining function for first derivative
|
||||
|
|
@ -39,8 +58,8 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
|
|||
return d2f_dg2
|
||||
if len(d2f_dg2) > 1 and len(d2f_dg2.shape)>1 and d2f_dg2.shape[-1] > 1:
|
||||
raise NotImplementedError('Not implemented for matricies yet')
|
||||
#dg_dx_2 = np.clip(dg_dx, 1e-12, _lim_val_square)**2
|
||||
dg_dx_2 = dg_dx**2
|
||||
dg_dx_2 = np.clip(dg_dx, -np.inf, _lim_val_square)**2
|
||||
#dg_dx_2 = dg_dx**2
|
||||
return d2f_dg2*(dg_dx_2) + df_dg*d2g_dx2
|
||||
|
||||
def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
|
||||
|
|
@ -55,8 +74,8 @@ def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
|
|||
if ( (len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1)
|
||||
or (len(d3f_dg3) > 1 and d3f_dg3.shape[-1] > 1)):
|
||||
raise NotImplementedError('Not implemented for matricies yet')
|
||||
#dg_dx_3 = np.clip(dg_dx, 1e-12, _lim_val_cube)**3
|
||||
dg_dx_3 = dg_dx**3
|
||||
dg_dx_3 = np.clip(dg_dx, -np.inf, _lim_val_cube)**3
|
||||
#dg_dx_3 = dg_dx**3
|
||||
return d3f_dg3*(dg_dx_3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
|
||||
|
||||
def opt_wrapper(m, **kwargs):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue