Merge pull request #544 from SheffieldML/devel

Release GPy 1.8.x
This commit is contained in:
Zhenwen Dai 2017-09-21 18:00:44 +01:00 committed by GitHub
commit d40db2b9af
47 changed files with 14431 additions and 273 deletions

11416
CHANGELOG.md

File diff suppressed because it is too large Load diff

View file

@ -1 +1 @@
__version__ = "1.7.7" __version__ = "1.8.0"

View file

@ -109,17 +109,79 @@ class GP(Model):
self.link_parameter(self.likelihood) self.link_parameter(self.likelihood)
self.posterior = None self.posterior = None
# The predictive variable to be used to predict using the posterior object's def to_dict(self, save_data=True):
# woodbury_vector and woodbury_inv is defined as predictive_variable input_dict = super(GP, self)._to_dict()
# as long as the posterior has the right woodbury entries. input_dict["class"] = "GPy.core.GP"
# It is the input variable used for the covariance between if not save_data:
# X_star and the posterior of the GP. input_dict["X"] = None
# This is usually just a link to self.X (full GP) or self.Z (sparse GP). input_dict["Y"] = None
# Make sure to name this variable and the predict functions will "just work" else:
# In maths the predictive variable is: try:
# K_{xx} - K_{xp}W_{pp}^{-1}K_{px} input_dict["X"] = self.X.values.tolist()
# W_{pp} := \texttt{Woodbury inv} except:
# p := _predictive_variable input_dict["X"] = self.X.tolist()
try:
input_dict["Y"] = self.Y.values.tolist()
except:
input_dict["Y"] = self.Y.tolist()
input_dict["kernel"] = self.kern.to_dict()
input_dict["likelihood"] = self.likelihood.to_dict()
if self.mean_function is not None:
input_dict["mean_function"] = self.mean_function.to_dict()
input_dict["inference_method"] = self.inference_method.to_dict()
#FIXME: Assumes the Y_metadata is serializable. We should create a Metadata class
if self.Y_metadata is not None:
input_dict["Y_metadata"] = self.Y_metadata
if self.normalizer is not None:
input_dict["normalizer"] = self.normalizer.to_dict()
return input_dict
@staticmethod
def _from_dict(input_dict, data=None):
import GPy
import numpy as np
if (input_dict['X'] is None) or (input_dict['Y'] is None):
assert(data is not None)
input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])
elif data is not None:
print("WARNING: The model has been saved with X,Y! The original values are being overriden!")
input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])
else:
input_dict["X"], input_dict["Y"] = np.array(input_dict['X']), np.array(input_dict['Y'])
input_dict["kernel"] = GPy.kern.Kern.from_dict(input_dict["kernel"])
input_dict["likelihood"] = GPy.likelihoods.likelihood.Likelihood.from_dict(input_dict["likelihood"])
mean_function = input_dict.get("mean_function")
if mean_function is not None:
input_dict["mean_function"] = GPy.core.mapping.Mapping.from_dict(mean_function)
else:
input_dict["mean_function"] = mean_function
input_dict["inference_method"] = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(input_dict["inference_method"])
#FIXME: Assumes the Y_metadata is serializable. We should create a Metadata class
Y_metadata = input_dict.get("Y_metadata")
input_dict["Y_metadata"] = Y_metadata
normalizer = input_dict.get("normalizer")
if normalizer is not None:
input_dict["normalizer"] = GPy.util.normalizer._Norm.from_dict(normalizer)
else:
input_dict["normalizer"] = normalizer
return GP(**input_dict)
def save_model(self, output_filename, compress=True, save_data=True):
self._save_model(output_filename, compress=True, save_data=True)
# The predictive variable to be used to predict using the posterior object's
# woodbury_vector and woodbury_inv is defined as predictive_variable
# as long as the posterior has the right woodbury entries.
# It is the input variable used for the covariance between
# X_star and the posterior of the GP.
# This is usually just a link to self.X (full GP) or self.Z (sparse GP).
# Make sure to name this variable and the predict functions will "just work"
# In maths the predictive variable is:
# K_{xx} - K_{xp}W_{pp}^{-1}K_{px}
# W_{pp} := \texttt{Woodbury inv}
# p := _predictive_variable
@property @property
def _predictive_variable(self): def _predictive_variable(self):
@ -305,9 +367,9 @@ class GP(Model):
m, v = self._raw_predict(X, full_cov=False, kern=kern) m, v = self._raw_predict(X, full_cov=False, kern=kern)
if likelihood is None: if likelihood is None:
likelihood = self.likelihood likelihood = self.likelihood
quantiles = likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata) quantiles = likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
if self.normalizer is not None: if self.normalizer is not None:
quantiles = [self.normalizer.inverse_mean(q) for q in quantiles] quantiles = [self.normalizer.inverse_mean(q) for q in quantiles]
return quantiles return quantiles
@ -616,4 +678,3 @@ class GP(Model):
""" """
mu_star, var_star = self._raw_predict(x_test) mu_star, var_star = self._raw_predict(x_test)
return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples) return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples)

View file

@ -25,6 +25,30 @@ class Mapping(Parameterized):
def update_gradients(self, dL_dF, X): def update_gradients(self, dL_dF, X):
raise NotImplementedError raise NotImplementedError
def to_dict(self):
raise NotImplementedError
def _to_dict(self):
input_dict = {}
input_dict["input_dim"] = self.input_dim
input_dict["output_dim"] = self.output_dim
input_dict["name"] = self.name
return input_dict
@staticmethod
def from_dict(input_dict):
import copy
input_dict = copy.deepcopy(input_dict)
mapping_class = input_dict.pop('class')
input_dict["name"] = str(input_dict["name"])
import GPy
mapping_class = eval(mapping_class)
return mapping_class._from_dict(mapping_class, input_dict)
@staticmethod
def _from_dict(mapping_class, input_dict):
return mapping_class(**input_dict)
class Bijective_mapping(Mapping): class Bijective_mapping(Mapping):
""" """
@ -37,5 +61,3 @@ class Bijective_mapping(Mapping):
def g(self, f): def g(self, f):
"""Inverse mapping from output domain of the function to the inputs.""" """Inverse mapping from output domain of the function to the inputs."""
raise NotImplementedError raise NotImplementedError

View file

@ -8,6 +8,61 @@ class Model(ParamzModel, Priorizable):
def __init__(self, name): def __init__(self, name):
super(Model, self).__init__(name) # Parameterized.__init__(self) super(Model, self).__init__(name) # Parameterized.__init__(self)
def _to_dict(self):
input_dict = {}
input_dict["name"] = self.name
return input_dict
def to_dict(self):
raise NotImplementedError
@staticmethod
def from_dict(input_dict, data=None):
import copy
input_dict = copy.deepcopy(input_dict)
model_class = input_dict.pop('class')
input_dict["name"] = str(input_dict["name"])
import GPy
model_class = eval(model_class)
return model_class._from_dict(input_dict, data)
@staticmethod
def _from_dict(model_class, input_dict, data=None):
return model_class(**input_dict)
def save_model(self, output_filename, compress=True, save_data=True):
raise NotImplementedError
def _save_model(self, output_filename, compress=True, save_data=True):
import json
output_dict = self.to_dict(save_data)
if compress:
import gzip
with gzip.GzipFile(output_filename + ".zip", 'w') as outfile:
json_str = json.dumps(output_dict)
json_bytes = json_str.encode('utf-8')
outfile.write(json_bytes)
else:
with open(output_filename + ".json", 'w') as outfile:
json.dump(output_dict, outfile)
@staticmethod
def load_model(output_filename, data=None):
compress = output_filename.split(".")[-1] == "zip"
import json
if compress:
import gzip
with gzip.GzipFile(output_filename, 'r') as json_data:
json_bytes = json_data.read()
json_str = json_bytes.decode('utf-8')
output_dict = json.loads(json_str)
else:
with open(output_filename) as json_data:
output_dict = json.load(json_data)
import GPy
return GPy.core.model.Model.from_dict(output_dict, data)
def log_likelihood(self): def log_likelihood(self):
raise NotImplementedError("this needs to be implemented to use the model class") raise NotImplementedError("this needs to be implemented to use the model class")

View file

@ -41,6 +41,26 @@ class LatentFunctionInference(object):
""" """
pass pass
def _to_dict(self):
input_dict = {}
return input_dict
def to_dict(self):
raise NotImplementedError
@staticmethod
def from_dict(input_dict):
import copy
input_dict = copy.deepcopy(input_dict)
inference_class = input_dict.pop('class')
import GPy
inference_class = eval(inference_class)
return inference_class._from_dict(inference_class, input_dict)
@staticmethod
def _from_dict(inference_class, input_dict):
return inference_class(**input_dict)
class InferenceMethodList(LatentFunctionInference, list): class InferenceMethodList(LatentFunctionInference, list):
def on_optimization_start(self): def on_optimization_start(self):

View file

@ -21,6 +21,11 @@ class ExactGaussianInference(LatentFunctionInference):
def __init__(self): def __init__(self):
pass#self._YYTfactor_cache = caching.cache() pass#self._YYTfactor_cache = caching.cache()
def to_dict(self):
input_dict = super(ExactGaussianInference, self)._to_dict()
input_dict["class"] = "GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference"
return input_dict
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, variance=None, Z_tilde=None): def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, variance=None, Z_tilde=None):
""" """
Returns a Posterior class containing essential quantities of the posterior Returns a Posterior class containing essential quantities of the posterior

View file

@ -6,11 +6,141 @@ from paramz import ObsAr
from . import ExactGaussianInference, VarDTC from . import ExactGaussianInference, VarDTC
from ...util import diag from ...util import diag
from .posterior import PosteriorEP as Posterior from .posterior import PosteriorEP as Posterior
from ...likelihoods import Gaussian
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
#Four wrapper classes to help modularisation of different EP versions
class marginalMoments(object):
def __init__(self, num_data):
self.Z_hat = np.empty(num_data,dtype=np.float64)
self.mu_hat = np.empty(num_data,dtype=np.float64)
self.sigma2_hat = np.empty(num_data,dtype=np.float64)
class cavityParams(object):
def __init__(self, num_data):
self.tau = np.empty(num_data,dtype=np.float64)
self.v = np.empty(num_data,dtype=np.float64)
def _update_i(self, eta, ga_approx, post_params, i):
self.tau[i] = 1./post_params.Sigma_diag[i] - eta*ga_approx.tau[i]
self.v[i] = post_params.mu[i]/post_params.Sigma_diag[i] - eta*ga_approx.v[i]
def to_dict(self):
return {"tau": self.tau.tolist(), "v": self.v.tolist()}
@staticmethod
def from_dict(input_dict):
c = cavityParams(len(input_dict["tau"]))
c.tau = np.array(input_dict["tau"])
c.v = np.array(input_dict["v"])
return c
class gaussianApproximation(object):
def __init__(self, v, tau):
self.tau = tau
self.v = v
def _update_i(self, eta, delta, post_params, marg_moments, i):
#Site parameters update
delta_tau = delta/eta*(1./marg_moments.sigma2_hat[i] - 1./post_params.Sigma_diag[i])
delta_v = delta/eta*(marg_moments.mu_hat[i]/marg_moments.sigma2_hat[i] - post_params.mu[i]/post_params.Sigma_diag[i])
tau_tilde_prev = self.tau[i]
self.tau[i] += delta_tau
# Enforce positivity of tau_tilde. Even though this is guaranteed for logconcave sites, it is still possible
# to get negative values due to numerical errors. Moreover, the value of tau_tilde should be positive in order to
# update the marginal likelihood without runnint into instabilities issues.
if self.tau[i] < np.finfo(float).eps:
self.tau[i] = np.finfo(float).eps
delta_tau = self.tau[i] - tau_tilde_prev
self.v[i] += delta_v
return (delta_tau, delta_v)
def to_dict(self):
return {"tau": self.tau.tolist(), "v": self.v.tolist()}
@staticmethod
def from_dict(input_dict):
return gaussianApproximation(np.array(input_dict["v"]), np.array(input_dict["tau"]))
class posteriorParamsBase(object):
def __init__(self, mu, Sigma_diag):
self.mu = mu
self.Sigma_diag = Sigma_diag
def _update_rank1(self, *arg):
pass
def _recompute(self, *arg):
pass
class posteriorParams(posteriorParamsBase):
def __init__(self, mu, Sigma, L=None):
self.Sigma = Sigma
self.L = L
Sigma_diag = np.diag(self.Sigma)
super(posteriorParams, self).__init__(mu, Sigma_diag)
def _update_rank1(self, delta_tau, ga_approx, i):
ci = delta_tau/(1.+ delta_tau*self.Sigma_diag[i])
DSYR(self.Sigma, self.Sigma[:,i].copy(), -ci)
self.mu = np.dot(self.Sigma, ga_approx.v)
def to_dict(self):
#TODO: Implement a more memory efficient variant
if self.L is None:
return { "mu": self.mu.tolist(), "Sigma": self.Sigma.tolist()}
else:
return { "mu": self.mu.tolist(), "Sigma": self.Sigma.tolist(), "L": self.L.tolist()}
@staticmethod
def from_dict(input_dict):
if "L" in input_dict:
return posteriorParams(np.array(input_dict["mu"]), np.array(input_dict["Sigma"]), np.array(input_dict["L"]))
else:
return posteriorParams(np.array(input_dict["mu"]), np.array(input_dict["Sigma"]))
@staticmethod
def _recompute(K, ga_approx):
num_data = len(ga_approx.tau)
tau_tilde_root = np.sqrt(ga_approx.tau)
Sroot_tilde_K = tau_tilde_root[:,None] * K
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
L = jitchol(B)
V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \Sigma^(-1))^(-1)
mu = np.dot(Sigma,ga_approx.v)
return posteriorParams(mu=mu, Sigma=Sigma, L=L)
class posteriorParamsDTC(posteriorParamsBase):
def __init__(self, mu, Sigma_diag):
super(posteriorParamsDTC, self).__init__(mu, Sigma_diag)
def _update_rank1(self, LLT, Kmn, delta_v, delta_tau, i):
#DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
DSYR(LLT,Kmn[:,i].copy(),delta_tau)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
self.Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\top)^(-1)) K_mn
si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i]
self.mu += (delta_v-delta_tau*self.mu[i])*si
#mu = np.dot(Sigma, v_tilde)
@staticmethod
def _recompute(LLT0, Kmn, ga_approx):
LLT = LLT0 + np.dot(Kmn*ga_approx.tau[None,:],Kmn.T)
L = jitchol(LLT)
V, _ = dtrtrs(L,Kmn,lower=1)
#Sigma_diag = np.sum(V*V,-2)
#Knmv_tilde = np.dot(Kmn,v_tilde)
#mu = np.dot(V2.T,Knmv_tilde)
Sigma = np.dot(V.T,V)
mu = np.dot(Sigma, ga_approx.v)
Sigma_diag = np.diag(Sigma).copy()
return posteriorParamsDTC(mu, Sigma_diag), LLT
class EPBase(object): class EPBase(object):
def __init__(self, epsilon=1e-6, eta=1., delta=1., always_reset=False, max_iters=np.inf, ep_mode="alternated", parallel_updates=False): def __init__(self, epsilon=1e-6, eta=1., delta=1., always_reset=False, max_iters=np.inf, ep_mode="alternated", parallel_updates=False, loading=False):
""" """
The expectation-propagation algorithm. The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006. For nomenclature see Rasmussen & Williams 2006.
@ -26,16 +156,20 @@ class EPBase(object):
:max_iters: int :max_iters: int
:ep_mode: string. It can be "nested" (EP is run every time the Hyperparameters change) or "alternated" (It runs EP at the beginning and then optimize the Hyperparameters). :ep_mode: string. It can be "nested" (EP is run every time the Hyperparameters change) or "alternated" (It runs EP at the beginning and then optimize the Hyperparameters).
:parallel_updates: boolean. If true, updates of the parameters of the sites in parallel :parallel_updates: boolean. If true, updates of the parameters of the sites in parallel
:loading: boolean. If True, prevents the EP parameters to change. Hack used when loading a serialized model
""" """
super(EPBase, self).__init__() super(EPBase, self).__init__()
self.always_reset = always_reset self.always_reset = always_reset
self.epsilon, self.eta, self.delta, self.max_iters = epsilon, eta, delta, max_iters self.epsilon, self.eta, self.delta, self.max_iters = epsilon, eta, delta, max_iters
self.ep_mode = ep_mode self.ep_mode = ep_mode
self.parallel_updates = parallel_updates self.parallel_updates = parallel_updates
#FIXME: Hack for serialiation. If True, prevents the EP parameters to change when loading a serialized model
self.loading = loading
self.reset() self.reset()
def reset(self): def reset(self):
self.old_mutilde, self.old_vtilde = None, None self.ga_approx_old = None
self._ep_approximation = None self._ep_approximation = None
def on_optimization_start(self): def on_optimization_start(self):
@ -45,6 +179,11 @@ class EPBase(object):
# TODO: update approximation in the end as well? Maybe even with a switch? # TODO: update approximation in the end as well? Maybe even with a switch?
pass pass
def _stop_criteria(self, ga_approx):
tau_diff = np.mean(np.square(ga_approx.tau-self.ga_approx_old.tau))
v_diff = np.mean(np.square(ga_approx.v-self.ga_approx_old.v))
return ((tau_diff < self.epsilon) and (v_diff < self.epsilon))
def __setstate__(self, state): def __setstate__(self, state):
super(EPBase, self).__setstate__(state[0]) super(EPBase, self).__setstate__(state[0])
self.epsilon, self.eta, self.delta = state[1] self.epsilon, self.eta, self.delta = state[1]
@ -53,9 +192,21 @@ class EPBase(object):
def __getstate__(self): def __getstate__(self):
return [super(EPBase, self).__getstate__() , [self.epsilon, self.eta, self.delta]] return [super(EPBase, self).__getstate__() , [self.epsilon, self.eta, self.delta]]
def _to_dict(self):
input_dict = super(EPBase, self)._to_dict()
input_dict["epsilon"]=self.epsilon
input_dict["eta"]=self.eta
input_dict["delta"]=self.delta
input_dict["always_reset"]=self.always_reset
input_dict["max_iters"]=self.max_iters
input_dict["ep_mode"]=self.ep_mode
input_dict["parallel_updates"]=self.parallel_updates
input_dict["loading"]=True
return input_dict
class EP(EPBase, ExactGaussianInference): class EP(EPBase, ExactGaussianInference):
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None): def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None):
if self.always_reset: if self.always_reset and not self.loading:
self.reset() self.reset()
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
@ -64,22 +215,22 @@ class EP(EPBase, ExactGaussianInference):
if K is None: if K is None:
K = kern.K(X) K = kern.K(X)
if self.ep_mode=="nested": if self.ep_mode=="nested" and not self.loading:
#Force EP at each step of the optimization #Force EP at each step of the optimization
self._ep_approximation = None self._ep_approximation = None
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata) post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
elif self.ep_mode=="alternated": elif self.ep_mode=="alternated" or self.loading:
if getattr(self, '_ep_approximation', None) is None: if getattr(self, '_ep_approximation', None) is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata) post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
else: else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation #if we've already run EP, just use the existing approximation stored in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation
else: else:
raise ValueError("ep_mode value not valid") raise ValueError("ep_mode value not valid")
v_tilde = mu_tilde * tau_tilde self.loading = False
return self._inference(K, tau_tilde, v_tilde, likelihood, Y_metadata=Y_metadata, Z_tilde=log_Z_tilde.sum()) return self._inference(Y, K, ga_approx, cav_params, likelihood, Y_metadata=Y_metadata, Z_tilde=log_Z_tilde)
def expectation_propagation(self, K, Y, likelihood, Y_metadata): def expectation_propagation(self, K, Y, likelihood, Y_metadata):
@ -90,41 +241,57 @@ class EP(EPBase, ExactGaussianInference):
# than ObsArrays # than ObsArrays
Y = Y.values.copy() Y = Y.values.copy()
#Initial values - Marginal moments #Initial values - Marginal moments, cavity params, gaussian approximation params and posterior params
Z_hat = np.empty(num_data,dtype=np.float64) marg_moments = marginalMoments(num_data)
mu_hat = np.empty(num_data,dtype=np.float64) cav_params = cavityParams(num_data)
sigma2_hat = np.empty(num_data,dtype=np.float64) ga_approx, post_params = self._init_approximations(K, num_data)
tau_cav = np.empty(num_data,dtype=np.float64) #Approximation
v_cav = np.empty(num_data,dtype=np.float64) stop = False
iterations = 0
while not stop and (iterations < self.max_iters):
self._local_updates(num_data, cav_params, post_params, marg_moments, ga_approx, likelihood, Y, Y_metadata)
#(re) compute Sigma and mu using full Cholesky decompy
post_params = posteriorParams._recompute(K, ga_approx)
#monitor convergence
if iterations > 0:
stop = self._stop_criteria(ga_approx)
self.ga_approx_old = gaussianApproximation(ga_approx.v.copy(), ga_approx.tau.copy())
iterations += 1
# Z_tilde after removing the terms that can lead to infinite terms due to tau_tilde close to zero.
# This terms cancel with the coreresponding terms in the marginal loglikelihood
log_Z_tilde = self._log_Z_tilde(marg_moments, ga_approx, cav_params)
# - 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)
return (post_params, ga_approx, cav_params, log_Z_tilde)
def _init_approximations(self, K, num_data):
#initial values - Gaussian factors #initial values - Gaussian factors
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
if self.old_mutilde is None: if self.ga_approx_old is None:
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data)) v_tilde, tau_tilde = np.zeros((2, num_data))
ga_approx = gaussianApproximation(v_tilde, tau_tilde)
Sigma = K.copy() Sigma = K.copy()
diag.add(Sigma, 1e-7) diag.add(Sigma, 1e-7)
mu = np.zeros(num_data) mu = np.zeros(num_data)
post_params = posteriorParams(mu, Sigma)
else: else:
assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!" assert self.ga_approx_old.v.size == num_data, "data size mis-match: did you change the data? try resetting!"
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde ga_approx = gaussianApproximation(self.ga_approx_old.v, self.ga_approx_old.tau)
tau_tilde = v_tilde/mu_tilde post_params = posteriorParams._recompute(K, ga_approx)
mu, Sigma, _ = self._ep_compute_posterior(K, tau_tilde, v_tilde) diag.add(post_params.Sigma, 1e-7)
diag.add(Sigma, 1e-7)
# TODO: Check the log-marginal under both conditions and choose the best one # TODO: Check the log-marginal under both conditions and choose the best one
return (ga_approx, post_params)
#Approximation def _local_updates(self, num_data, cav_params, post_params, marg_moments, ga_approx, likelihood, Y, Y_metadata, update_order=None):
tau_diff = self.epsilon + 1. if update_order is None:
v_diff = self.epsilon + 1. update_order = np.random.permutation(num_data)
tau_tilde_old = np.nan
v_tilde_old = np.nan
iterations = 0
while ((tau_diff > self.epsilon) or (v_diff > self.epsilon)) and (iterations < self.max_iters):
update_order = np.random.permutation(num_data)
for i in update_order: for i in update_order:
#Cavity distribution parameters #Cavity distribution parameters
tau_cav[i] = 1./Sigma[i,i] - self.eta*tau_tilde[i] cav_params._update_i(self.eta, ga_approx, post_params, i)
v_cav[i] = mu[i]/Sigma[i,i] - self.eta*v_tilde[i]
if Y_metadata is not None: if Y_metadata is not None:
# Pick out the relavent metadata for Yi # Pick out the relavent metadata for Yi
Y_metadata_i = {} Y_metadata_i = {}
@ -133,93 +300,77 @@ class EP(EPBase, ExactGaussianInference):
else: else:
Y_metadata_i = None Y_metadata_i = None
#Marginal moments #Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i], Y_metadata_i=Y_metadata_i) marg_moments.Z_hat[i], marg_moments.mu_hat[i], marg_moments.sigma2_hat[i] = likelihood.moments_match_ep(Y[i], cav_params.tau[i], cav_params.v[i], Y_metadata_i=Y_metadata_i)
#Site parameters update
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
tau_tilde_prev = tau_tilde[i]
tau_tilde[i] += delta_tau
# Enforce positivity of tau_tilde. Even though this is guaranteed for logconcave sites, it is still possible #Site parameters update
# to get negative values due to numerical errors. Moreover, the value of tau_tilde should be positive in order to delta_tau, delta_v = ga_approx._update_i(self.eta, self.delta, post_params, marg_moments, i)
# update the marginal likelihood without inestability issues.
if tau_tilde[i] < np.finfo(float).eps:
tau_tilde[i] = np.finfo(float).eps
delta_tau = tau_tilde[i] - tau_tilde_prev
v_tilde[i] += delta_v
if self.parallel_updates == False: if self.parallel_updates == False:
#Posterior distribution parameters update post_params._update_rank1(delta_tau, ga_approx, i)
ci = delta_tau/(1.+ delta_tau*Sigma[i,i])
DSYR(Sigma, Sigma[:,i].copy(), -ci)
mu = np.dot(Sigma, v_tilde)
#(re) compute Sigma and mu using full Cholesky decompy def _log_Z_tilde(self, marg_moments, ga_approx, cav_params):
mu, Sigma, _ = self._ep_compute_posterior(K, tau_tilde, v_tilde) return np.sum((np.log(marg_moments.Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(1+ga_approx.tau/cav_params.tau) - 0.5 * ((ga_approx.v)**2 * 1./(cav_params.tau + ga_approx.tau))
+ 0.5*(cav_params.v * ( ( (ga_approx.tau/cav_params.tau) * cav_params.v - 2.0 * ga_approx.v ) * 1./(cav_params.tau + ga_approx.tau)))))
#monitor convergence
if iterations > 0:
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
tau_tilde_old = tau_tilde.copy()
v_tilde_old = v_tilde.copy()
iterations += 1
mu_tilde = v_tilde/tau_tilde def _ep_marginal(self, K, ga_approx, Z_tilde):
mu_cav = v_cav/tau_cav post_params = posteriorParams._recompute(K, ga_approx)
sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde
# Z_tilde after removing the terms that can lead to infinite terms due to tau_tilde close to zero.
# This terms cancel with the coreresponding terms in the marginal loglikelihood
log_Z_tilde = (np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(1+tau_tilde/tau_cav)
- 0.5 * ((v_tilde)**2 * 1./(tau_cav + tau_tilde)) + 0.5*(v_cav * ( ( (tau_tilde/tau_cav) * v_cav - 2.0 * v_tilde ) * 1./(tau_cav + tau_tilde))))
# - 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)
self.old_mutilde = mu_tilde
self.old_vtilde = v_tilde
return mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde
def _ep_compute_posterior(self, K, tau_tilde, v_tilde):
num_data = len(tau_tilde)
tau_tilde_root = np.sqrt(tau_tilde)
Sroot_tilde_K = tau_tilde_root[:,None] * K
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
L = jitchol(B)
V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \Sigma^(-1))^(-1)
mu = np.dot(Sigma,v_tilde)
return (mu, Sigma, L)
def _ep_marginal(self, K, tau_tilde, v_tilde, Z_tilde):
mu, Sigma, L = self._ep_compute_posterior(K, tau_tilde, v_tilde)
# Gaussian log marginal excluding terms that can go to infinity due to arbitrarily small tau_tilde. # Gaussian log marginal excluding terms that can go to infinity due to arbitrarily small tau_tilde.
# These terms cancel out with the terms excluded from Z_tilde # These terms cancel out with the terms excluded from Z_tilde
B_logdet = np.sum(2.0*np.log(np.diag(L))) B_logdet = np.sum(2.0*np.log(np.diag(post_params.L)))
log_marginal = 0.5*(-len(tau_tilde) * log_2_pi - B_logdet + np.sum(v_tilde * np.dot(Sigma,v_tilde))) log_marginal = 0.5*(-len(ga_approx.tau) * log_2_pi - B_logdet + np.sum(ga_approx.v * np.dot(post_params.Sigma,ga_approx.v)))
log_marginal += Z_tilde log_marginal += Z_tilde
return log_marginal, mu, Sigma, L return log_marginal, post_params
def _inference(self, K, tau_tilde, v_tilde, likelihood, Z_tilde, Y_metadata=None): def _inference(self, Y, K, ga_approx, cav_params, likelihood, Z_tilde, Y_metadata=None):
log_marginal, mu, Sigma, L = self._ep_marginal(K, tau_tilde, v_tilde, Z_tilde) log_marginal, post_params = self._ep_marginal(K, ga_approx, Z_tilde)
tau_tilde_root = np.sqrt(tau_tilde) tau_tilde_root = np.sqrt(ga_approx.tau)
Sroot_tilde_K = tau_tilde_root[:,None] * K Sroot_tilde_K = tau_tilde_root[:,None] * K
aux_alpha , _ = dpotrs(L, np.dot(Sroot_tilde_K, v_tilde), lower=1) aux_alpha , _ = dpotrs(post_params.L, np.dot(Sroot_tilde_K, ga_approx.v), lower=1)
alpha = (v_tilde - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) /mu^(/tilde) alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) /mu^(/tilde)
LWi, _ = dtrtrs(L, np.diag(tau_tilde_root), lower=1) LWi, _ = dtrtrs(post_params.L, np.diag(tau_tilde_root), lower=1)
Wi = np.dot(LWi.T,LWi) Wi = np.dot(LWi.T,LWi)
symmetrify(Wi) #(K + Sigma^(\tilde))^(-1) symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
dL_dK = 0.5 * (tdot(alpha) - Wi) dL_dK = 0.5 * (tdot(alpha) - Wi)
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata) dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha} return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
def to_dict(self):
input_dict = super(EP, self)._to_dict()
input_dict["class"] = "GPy.inference.latent_function_inference.expectation_propagation.EP"
if self.ga_approx_old is not None:
input_dict["ga_approx_old"] = self.ga_approx_old.to_dict()
if self._ep_approximation is not None:
input_dict["_ep_approximation"] = {}
input_dict["_ep_approximation"]["post_params"] = self._ep_approximation[0].to_dict()
input_dict["_ep_approximation"]["ga_approx"] = self._ep_approximation[1].to_dict()
input_dict["_ep_approximation"]["cav_params"] = self._ep_approximation[2].to_dict()
input_dict["_ep_approximation"]["log_Z_tilde"] = self._ep_approximation[3].tolist()
return input_dict
@staticmethod
def _from_dict(inference_class, input_dict):
ga_approx_old = input_dict.pop('ga_approx_old', None)
if ga_approx_old is not None:
ga_approx_old = gaussianApproximation.from_dict(ga_approx_old)
_ep_approximation_dict = input_dict.pop('_ep_approximation', None)
_ep_approximation = []
if _ep_approximation is not None:
_ep_approximation.append(posteriorParams.from_dict(_ep_approximation_dict["post_params"]))
_ep_approximation.append(gaussianApproximation.from_dict(_ep_approximation_dict["ga_approx"]))
_ep_approximation.append(cavityParams.from_dict(_ep_approximation_dict["cav_params"]))
_ep_approximation.append(np.array(_ep_approximation_dict["log_Z_tilde"]))
ee = EP(**input_dict)
ee.ga_approx_old = ga_approx_old
ee._ep_approximation = _ep_approximation
return ee
class EPDTC(EPBase, VarDTC): class EPDTC(EPBase, VarDTC):
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None): def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
@ -244,24 +395,25 @@ class EPDTC(EPBase, VarDTC):
if self.ep_mode=="nested": if self.ep_mode=="nested":
#Force EP at each step of the optimization #Force EP at each step of the optimization
self._ep_approximation = None self._ep_approximation = None
mu, Sigma_diag, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata) post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
elif self.ep_mode=="alternated": elif self.ep_mode=="alternated":
if getattr(self, '_ep_approximation', None) is None: if getattr(self, '_ep_approximation', None) is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma_diag, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata) post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
else: else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation #if we've already run EP, just use the existing approximation stored in self._ep_approximation
mu, Sigma_diag, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation post_params, ga_approx, log_Z_tilde = self._ep_approximation
else: else:
raise ValueError("ep_mode value not valid") raise ValueError("ep_mode value not valid")
return super(EPDTC, self).inference(kern, X, Z, likelihood, mu_tilde, mu_tilde = ga_approx.v / ga_approx.tau.astype(float)
return super(EPDTC, self).inference(kern, X, Z, likelihood, ObsAr(mu_tilde[:,None]),
mean_function=mean_function, mean_function=mean_function,
Y_metadata=Y_metadata, Y_metadata=Y_metadata,
precision=tau_tilde, precision=ga_approx.tau,
Lm=Lm, dL_dKmm=dL_dKmm, Lm=Lm, dL_dKmm=dL_dKmm,
psi0=psi0, psi1=psi1, psi2=psi2, Z_tilde=log_Z_tilde.sum()) psi0=psi0, psi1=psi1, psi2=psi2, Z_tilde=log_Z_tilde)
def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata): def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata):
@ -272,119 +424,91 @@ class EPDTC(EPBase, VarDTC):
# than ObsArrays # than ObsArrays
Y = Y.values.copy() Y = Y.values.copy()
#Initial values - Marginal moments #Initial values - Marginal moments, cavity params, gaussian approximation params and posterior params
Z_hat = np.zeros(num_data,dtype=np.float64) marg_moments = marginalMoments(num_data)
mu_hat = np.zeros(num_data,dtype=np.float64) cav_params = cavityParams(num_data)
sigma2_hat = np.zeros(num_data,dtype=np.float64) ga_approx, post_params, LLT0, LLT = self._init_approximations(Kmm, Kmn, num_data)
tau_cav = np.empty(num_data,dtype=np.float64) #Approximation
v_cav = np.empty(num_data,dtype=np.float64) stop = False
iterations = 0
while not stop and (iterations < self.max_iters):
self._local_updates(num_data, LLT0, LLT, Kmn, cav_params, post_params, marg_moments, ga_approx, likelihood, Y, Y_metadata)
#(re) compute Sigma, Sigma_diag and mu using full Cholesky decompy
post_params, LLT = posteriorParamsDTC._recompute(LLT0, Kmn, ga_approx)
post_params.Sigma_diag = np.maximum(post_params.Sigma_diag, np.finfo(float).eps)
#monitor convergence
if iterations > 0:
stop = self._stop_criteria(ga_approx)
self.ga_approx_old = gaussianApproximation(ga_approx.v.copy(), ga_approx.tau.copy())
iterations += 1
log_Z_tilde = self._log_Z_tilde(marg_moments, ga_approx, cav_params)
return post_params, ga_approx, log_Z_tilde
def _log_Z_tilde(self, marg_moments, ga_approx, cav_params):
mu_tilde = ga_approx.v/ga_approx.tau
mu_cav = cav_params.v/cav_params.tau
sigma2_sigma2tilde = 1./cav_params.tau + 1./ga_approx.tau
return np.sum((np.log(marg_moments.Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
+ 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde)))
def _init_approximations(self, Kmm, Kmn, num_data):
#initial values - Gaussian factors #initial values - Gaussian factors
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
LLT0 = Kmm.copy() LLT0 = Kmm.copy()
Lm = jitchol(LLT0) #K_m = L_m L_m^\top Lm = jitchol(LLT0) #K_m = L_m L_m^\top
Vm,info = dtrtrs(Lm,Kmn,lower=1) Vm,info = dtrtrs(Lm, Kmn,lower=1)
# Lmi = dtrtri(Lm) # Lmi = dtrtri(Lm)
# Kmmi = np.dot(Lmi.T,Lmi) # Kmmi = np.dot(Lmi.T,Lmi)
# KmmiKmn = np.dot(Kmmi,Kmn) # KmmiKmn = np.dot(Kmmi,Kmn)
# Qnn_diag = np.sum(Kmn*KmmiKmn,-2) # Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
Qnn_diag = np.sum(Vm*Vm,-2) #diag(Knm Kmm^(-1) Kmn) Qnn_diag = np.sum(Vm*Vm,-2) #diag(Knm Kmm^(-1) Kmn)
#diag.add(LLT0, 1e-8) #diag.add(LLT0, 1e-8)
if self.old_mutilde is None: if self.ga_approx_old is None:
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
LLT = LLT0.copy() #Sigma = K.copy() LLT = LLT0.copy() #Sigma = K.copy()
mu = np.zeros(num_data) mu = np.zeros(num_data)
Sigma_diag = Qnn_diag.copy() + 1e-8 Sigma_diag = Qnn_diag.copy() + 1e-8
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data)) v_tilde, tau_tilde = np.zeros((2, num_data))
ga_approx = gaussianApproximation(v_tilde, tau_tilde)
post_params = posteriorParamsDTC(mu, Sigma_diag)
else: else:
assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!" assert self.ga_approx_old.v.size == num_data, "data size mis-match: did you change the data? try resetting!"
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde ga_approx = gaussianApproximation(self.ga_approx_old.v, self.ga_approx_old.tau)
tau_tilde = v_tilde/mu_tilde post_params, LLT = posteriorParamsDTC._recompute(LLT0, Kmn, ga_approx)
mu, Sigma_diag, LLT = self._ep_compute_posterior(LLT0, Kmn, tau_tilde, v_tilde) post_params.Sigma_diag += 1e-8
Sigma_diag += 1e-8
# TODO: Check the log-marginal under both conditions and choose the best one # TODO: Check the log-marginal under both conditions and choose the best one
#Approximation return (ga_approx, post_params, LLT0, LLT)
tau_diff = self.epsilon + 1.
v_diff = self.epsilon + 1. def _local_updates(self, num_data, LLT0, LLT, Kmn, cav_params, post_params, marg_moments, ga_approx, likelihood, Y, Y_metadata, update_order=None):
tau_tilde_old = np.nan if update_order is None:
v_tilde_old = np.nan
iterations = 0
while ((tau_diff > self.epsilon) or (v_diff > self.epsilon)) and (iterations < self.max_iters):
update_order = np.random.permutation(num_data) update_order = np.random.permutation(num_data)
for i in update_order: for i in update_order:
#Cavity distribution parameters
tau_cav[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
v_cav[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
if Y_metadata is not None:
# Pick out the relavent metadata for Yi
Y_metadata_i = {}
for key in Y_metadata.keys():
Y_metadata_i[key] = Y_metadata[key][i, :]
else:
Y_metadata_i = None
#Marginal moments #Cavity distribution parameters
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i], Y_metadata_i=Y_metadata_i) cav_params._update_i(self.eta, ga_approx, post_params, i)
#Site parameters update
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
tau_tilde_prev = tau_tilde[i]
tau_tilde[i] += delta_tau
# Enforce positivity of tau_tilde. Even though this is guaranteed for logconcave sites, it is still possible
# to get negative values due to numerical errors. Moreover, the value of tau_tilde should be positive in order to
# update the marginal likelihood without inestability issues.
if tau_tilde[i] < np.finfo(float).eps:
tau_tilde[i] = np.finfo(float).eps
delta_tau = tau_tilde[i] - tau_tilde_prev
v_tilde[i] += delta_v
#Posterior distribution parameters update if Y_metadata is not None:
if self.parallel_updates == False: # Pick out the relavent metadata for Yi
#DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i])) Y_metadata_i = {}
DSYR(LLT,Kmn[:,i].copy(),delta_tau) for key in Y_metadata.keys():
L = jitchol(LLT) Y_metadata_i[key] = Y_metadata[key][i, :]
V,info = dtrtrs(L,Kmn,lower=1) else:
Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\top)^(-1)) K_mn Y_metadata_i = None
si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i]
mu += (delta_v-delta_tau*mu[i])*si
#mu = np.dot(Sigma, v_tilde)
#(re) compute Sigma, Sigma_diag and mu using full Cholesky decompy #Marginal moments
mu, Sigma_diag, LLT = self._ep_compute_posterior(LLT0, Kmn, tau_tilde, v_tilde) marg_moments.Z_hat[i], marg_moments.mu_hat[i], marg_moments.sigma2_hat[i] = likelihood.moments_match_ep(Y[i], cav_params.tau[i], cav_params.v[i], Y_metadata_i=Y_metadata_i)
Sigma_diag = np.maximum(Sigma_diag, np.finfo(float).eps) #Site parameters update
delta_tau, delta_v = ga_approx._update_i(self.eta, self.delta, post_params, marg_moments, i)
#monitor convergence #Posterior distribution parameters update
if iterations>0: if self.parallel_updates == False:
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old)) post_params._update_rank1(LLT, Kmn, delta_v, delta_tau, i)
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
tau_tilde_old = tau_tilde.copy()
v_tilde_old = v_tilde.copy()
iterations += 1
mu_tilde = v_tilde/tau_tilde
mu_cav = v_cav/tau_cav
sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde
log_Z_tilde = (np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
+ 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde))
self.old_mutilde = mu_tilde
self.old_vtilde = v_tilde
return mu, Sigma_diag, ObsAr(mu_tilde[:,None]), tau_tilde, log_Z_tilde
def _ep_compute_posterior(self, LLT0, Kmn, tau_tilde, v_tilde):
LLT = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V, _ = dtrtrs(L,Kmn,lower=1)
#Sigma_diag = np.sum(V*V,-2)
#Knmv_tilde = np.dot(Kmn,v_tilde)
#mu = np.dot(V2.T,Knmv_tilde)
Sigma = np.dot(V.T,V)
mu = np.dot(Sigma,v_tilde)
Sigma_diag = np.diag(Sigma).copy()
return (mu, Sigma_diag, LLT)

View file

@ -13,9 +13,9 @@ class Add(CombinationKernel):
propagates gradients through. propagates gradients through.
This kernel will take over the active dims of it's subkernels passed in. This kernel will take over the active dims of it's subkernels passed in.
NOTE: The subkernels will be copies of the original kernels, to prevent NOTE: The subkernels will be copies of the original kernels, to prevent
unexpected behavior. unexpected behavior.
""" """
def __init__(self, subkerns, name='sum'): def __init__(self, subkerns, name='sum'):
_newkerns = [] _newkerns = []
@ -26,7 +26,7 @@ class Add(CombinationKernel):
_newkerns.append(part.copy()) _newkerns.append(part.copy())
else: else:
_newkerns.append(kern.copy()) _newkerns.append(kern.copy())
super(Add, self).__init__(_newkerns, name) super(Add, self).__init__(_newkerns, name)
self._exact_psicomp = self._check_exact_psicomp() self._exact_psicomp = self._check_exact_psicomp()
@ -43,6 +43,11 @@ class Add(CombinationKernel):
else: else:
return False return False
def to_dict(self):
input_dict = super(Add, self)._to_dict()
input_dict["class"] = str("GPy.kern.Add")
return input_dict
@Cache_this(limit=3, force_kwargs=['which_parts']) @Cache_this(limit=3, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None): def K(self, X, X2=None, which_parts=None):
""" """

View file

@ -60,6 +60,35 @@ class Kern(Parameterized):
from .psi_comp import PSICOMP_GH from .psi_comp import PSICOMP_GH
self.psicomp = PSICOMP_GH() self.psicomp = PSICOMP_GH()
def _to_dict(self):
input_dict = {}
input_dict["input_dim"] = self.input_dim
if isinstance(self.active_dims, np.ndarray):
input_dict["active_dims"] = self.active_dims.tolist()
else:
input_dict["active_dims"] = self.active_dims
input_dict["name"] = self.name
input_dict["useGPU"] = self.useGPU
return input_dict
def to_dict(self):
raise NotImplementedError
@staticmethod
def from_dict(input_dict):
import copy
input_dict = copy.deepcopy(input_dict)
kernel_class = input_dict.pop('class')
input_dict["name"] = str(input_dict["name"])
import GPy
kernel_class = eval(kernel_class)
return kernel_class._from_dict(kernel_class, input_dict)
@staticmethod
def _from_dict(kernel_class, input_dict):
return kernel_class(**input_dict)
def __setstate__(self, state): def __setstate__(self, state):
self._all_dims_active = np.arange(0, max(state['active_dims']) + 1) self._all_dims_active = np.arange(0, max(state['active_dims']) + 1)
super(Kern, self).__setstate__(state) super(Kern, self).__setstate__(state)
@ -342,6 +371,21 @@ class CombinationKernel(Kern):
self.extra_dims = extra_dims self.extra_dims = extra_dims
self.link_parameters(*kernels) self.link_parameters(*kernels)
def _to_dict(self):
input_dict = super(CombinationKernel, self)._to_dict()
input_dict["parts"] = {}
for ii in range(len(self.parts)):
input_dict["parts"][ii] = self.parts[ii].to_dict()
return input_dict
@staticmethod
def _from_dict(kernel_class, input_dict):
parts = input_dict.pop('parts', None)
subkerns = []
for pp in parts:
subkerns.append(Kern.from_dict(parts[pp]))
return kernel_class(subkerns)
@property @property
def parts(self): def parts(self):
return self.parameters return self.parameters

View file

@ -51,6 +51,18 @@ class Linear(Kern):
self.link_parameter(self.variances) self.link_parameter(self.variances)
self.psicomp = PSICOMP_Linear() self.psicomp = PSICOMP_Linear()
def to_dict(self):
input_dict = super(Linear, self)._to_dict()
input_dict["class"] = "GPy.kern.Linear"
input_dict["variances"] = self.variances.values.tolist()
input_dict["ARD"] = self.ARD
return input_dict
@staticmethod
def _from_dict(kernel_class, input_dict):
useGPU = input_dict.pop('useGPU', None)
return Linear(**input_dict)
@Cache_this(limit=3) @Cache_this(limit=3)
def K(self, X, X2=None): def K(self, X, X2=None):
if self.ARD: if self.ARD:
@ -211,5 +223,3 @@ class LinearFull(Kern):
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
P = np.dot(self.W, self.W.T) + np.diag(self.kappa) P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
return 2.*np.einsum('jk,i,ij->ik', P, dL_dKdiag, X) return 2.*np.einsum('jk,i,ij->ik', P, dL_dKdiag, X)

View file

@ -400,4 +400,3 @@ class PeriodicMatern52(Periodic):
self.variance.gradient = np.sum(dK_dvar*dL_dK) self.variance.gradient = np.sum(dK_dvar*dL_dK)
self.lengthscale.gradient = np.sum(dK_dlen*dL_dK) self.lengthscale.gradient = np.sum(dK_dlen*dL_dK)
self.period.gradient = np.sum(dK_dper*dL_dK) self.period.gradient = np.sum(dK_dper*dL_dK)

View file

@ -39,6 +39,11 @@ class Prod(CombinationKernel):
kernels.insert(i, part) kernels.insert(i, part)
super(Prod, self).__init__(kernels, name) super(Prod, self).__init__(kernels, name)
def to_dict(self):
input_dict = super(Prod, self)._to_dict()
input_dict["class"] = str("GPy.kern.Prod")
return input_dict
@Cache_this(limit=3, force_kwargs=['which_parts']) @Cache_this(limit=3, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None): def K(self, X, X2=None, which_parts=None):
if which_parts is None: if which_parts is None:
@ -117,7 +122,7 @@ class Prod(CombinationKernel):
part_param_num = len(p.param_array) # number of parameters in the part part_param_num = len(p.param_array) # number of parameters in the part
p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)]) p.sde_update_gradient_full(gradients[part_start_param_index:(part_start_param_index+part_param_num)])
part_start_param_index += part_param_num part_start_param_index += part_param_num
def sde(self): def sde(self):
""" """
""" """
@ -131,88 +136,88 @@ class Prod(CombinationKernel):
dQc = None dQc = None
dPinf = None dPinf = None
dP0 = None dP0 = None
# Assign models # Assign models
for p in self.parts: for p in self.parts:
(Ft,Lt,Qct,Ht,P_inft, P0t, dFt,dQct,dP_inft,dP0t) = p.sde() (Ft,Lt,Qct,Ht,P_inft, P0t, dFt,dQct,dP_inft,dP0t) = p.sde()
# check derivative dimensions -> # check derivative dimensions ->
number_of_parameters = len(p.param_array) number_of_parameters = len(p.param_array)
assert dFt.shape[2] == number_of_parameters, "Dynamic matrix derivative shape is wrong" assert dFt.shape[2] == number_of_parameters, "Dynamic matrix derivative shape is wrong"
assert dQct.shape[2] == number_of_parameters, "Diffusion matrix derivative shape is wrong" assert dQct.shape[2] == number_of_parameters, "Diffusion matrix derivative shape is wrong"
assert dP_inft.shape[2] == number_of_parameters, "Infinite covariance matrix derivative shape is wrong" assert dP_inft.shape[2] == number_of_parameters, "Infinite covariance matrix derivative shape is wrong"
# check derivative dimensions <- # check derivative dimensions <-
# exception for periodic kernel # exception for periodic kernel
if (p.name == 'std_periodic'): if (p.name == 'std_periodic'):
Qct = P_inft Qct = P_inft
dQct = dP_inft dQct = dP_inft
dF = dkron(F,dF,Ft,dFt,'sum') dF = dkron(F,dF,Ft,dFt,'sum')
dQc = dkron(Qc,dQc,Qct,dQct,'prod') dQc = dkron(Qc,dQc,Qct,dQct,'prod')
dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod') dPinf = dkron(Pinf,dPinf,P_inft,dP_inft,'prod')
dP0 = dkron(P0,dP0,P0t,dP0t,'prod') dP0 = dkron(P0,dP0,P0t,dP0t,'prod')
F = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft) F = np.kron(F,np.eye(Ft.shape[0])) + np.kron(np.eye(F.shape[0]),Ft)
L = np.kron(L,Lt) L = np.kron(L,Lt)
Qc = np.kron(Qc,Qct) Qc = np.kron(Qc,Qct)
Pinf = np.kron(Pinf,P_inft) Pinf = np.kron(Pinf,P_inft)
P0 = np.kron(P0,P_inft) P0 = np.kron(P0,P_inft)
H = np.kron(H,Ht) H = np.kron(H,Ht)
return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0) return (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
def dkron(A,dA,B,dB, operation='prod'): def dkron(A,dA,B,dB, operation='prod'):
""" """
Function computes the derivative of Kronecker product A*B Function computes the derivative of Kronecker product A*B
(or Kronecker sum A+B). (or Kronecker sum A+B).
Input: Input:
----------------------- -----------------------
A: 2D matrix A: 2D matrix
Some matrix Some matrix
dA: 3D (or 2D matrix) dA: 3D (or 2D matrix)
Derivarives of A Derivarives of A
B: 2D matrix B: 2D matrix
Some matrix Some matrix
dB: 3D (or 2D matrix) dB: 3D (or 2D matrix)
Derivarives of B Derivarives of B
operation: str 'prod' or 'sum' operation: str 'prod' or 'sum'
Which operation is considered. If the operation is 'sum' it is assumed Which operation is considered. If the operation is 'sum' it is assumed
that A and are square matrices.s that A and are square matrices.s
Output: Output:
dC: 3D matrix dC: 3D matrix
Derivative of Kronecker product A*B (or Kronecker sum A+B) Derivative of Kronecker product A*B (or Kronecker sum A+B)
""" """
if dA is None: if dA is None:
dA_param_num = 0 dA_param_num = 0
dA = np.zeros((A.shape[0], A.shape[1],1)) dA = np.zeros((A.shape[0], A.shape[1],1))
else: else:
dA_param_num = dA.shape[2] dA_param_num = dA.shape[2]
if dB is None: if dB is None:
dB_param_num = 0 dB_param_num = 0
dB = np.zeros((B.shape[0], B.shape[1],1)) dB = np.zeros((B.shape[0], B.shape[1],1))
else: else:
dB_param_num = dB.shape[2] dB_param_num = dB.shape[2]
# Space allocation for derivative matrix # Space allocation for derivative matrix
dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num + dB_param_num)) dC = np.zeros((A.shape[0]*B.shape[0], A.shape[1]*B.shape[1], dA_param_num + dB_param_num))
for k in range(dA_param_num): for k in range(dA_param_num):
if operation == 'prod': if operation == 'prod':
dC[:,:,k] = np.kron(dA[:,:,k],B); dC[:,:,k] = np.kron(dA[:,:,k],B);
else: else:
dC[:,:,k] = np.kron(dA[:,:,k],np.eye( B.shape[0] )) dC[:,:,k] = np.kron(dA[:,:,k],np.eye( B.shape[0] ))
for k in range(dB_param_num): for k in range(dB_param_num):
if operation == 'prod': if operation == 'prod':
dC[:,:,dA_param_num+k] = np.kron(A,dB[:,:,k]) dC[:,:,dA_param_num+k] = np.kron(A,dB[:,:,k])
else: else:
dC[:,:,dA_param_num+k] = np.kron(np.eye( A.shape[0] ),dB[:,:,k]) dC[:,:,dA_param_num+k] = np.kron(np.eye( A.shape[0] ),dB[:,:,k])
return dC return dC

View file

@ -31,6 +31,14 @@ class RBF(Stationary):
self.inv_l = Param('inv_lengthscale',1./self.lengthscale**2, Logexp()) self.inv_l = Param('inv_lengthscale',1./self.lengthscale**2, Logexp())
self.link_parameter(self.inv_l) self.link_parameter(self.inv_l)
def to_dict(self):
input_dict = super(RBF, self)._to_dict()
input_dict["class"] = "GPy.kern.RBF"
input_dict["inv_l"] = self.use_invLengthscale
if input_dict["inv_l"] == True:
input_dict["lengthscale"] = np.sqrt(1 / float(self.inv_l))
return input_dict
def K_of_r(self, r): def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r**2) return self.variance * np.exp(-0.5 * r**2)
@ -42,7 +50,7 @@ class RBF(Stationary):
def dK2_drdr_diag(self): def dK2_drdr_diag(self):
return -self.variance # as the diagonal of r is always filled with zeros return -self.variance # as the diagonal of r is always filled with zeros
def __getstate__(self): def __getstate__(self):
dc = super(RBF, self).__getstate__() dc = super(RBF, self).__getstate__()
if self.useGPU: if self.useGPU:

View file

@ -93,6 +93,17 @@ class StdPeriodic(Kern):
self.link_parameters(self.variance, self.period, self.lengthscale) self.link_parameters(self.variance, self.period, self.lengthscale)
def to_dict(self):
input_dict = super(StdPeriodic, self)._to_dict()
input_dict["class"] = "GPy.kern.StdPeriodic"
input_dict["variance"] = self.variance.values.tolist()
input_dict["period"] = self.period.values.tolist()
input_dict["lengthscale"] = self.lengthscale.values.tolist()
input_dict["ARD1"] = self.ARD1
input_dict["ARD2"] = self.ARD2
return input_dict
def parameters_changed(self): def parameters_changed(self):
""" """
This functions deals as a callback for each optimization iteration. This functions deals as a callback for each optimization iteration.

View file

@ -14,6 +14,11 @@ class Static(Kern):
self.variance = Param('variance', variance, Logexp()) self.variance = Param('variance', variance, Logexp())
self.link_parameters(self.variance) self.link_parameters(self.variance)
def _to_dict(self):
input_dict = super(Static, self)._to_dict()
input_dict["variance"] = self.variance.values.tolist()
return input_dict
def Kdiag(self, X): def Kdiag(self, X):
ret = np.empty((X.shape[0],), dtype=np.float64) ret = np.empty((X.shape[0],), dtype=np.float64)
ret[:] = self.variance ret[:] = self.variance
@ -133,6 +138,16 @@ class Bias(Static):
def __init__(self, input_dim, variance=1., active_dims=None, name='bias'): def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
super(Bias, self).__init__(input_dim, variance, active_dims, name) super(Bias, self).__init__(input_dim, variance, active_dims, name)
def to_dict(self):
input_dict = super(Bias, self)._to_dict()
input_dict["class"] = "GPy.kern.Bias"
return input_dict
@staticmethod
def _from_dict(kernel_class, input_dict):
useGPU = input_dict.pop('useGPU', None)
return Bias(**input_dict)
def K(self, X, X2=None): def K(self, X, X2=None):
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0]) shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
return np.full(shape, self.variance, dtype=np.float64) return np.full(shape, self.variance, dtype=np.float64)
@ -250,4 +265,3 @@ class Precomputed(Fixed):
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self._index(X, None)) self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self._index(X, None))

View file

@ -79,6 +79,13 @@ class Stationary(Kern):
assert self.variance.size==1 assert self.variance.size==1
self.link_parameters(self.variance, self.lengthscale) self.link_parameters(self.variance, self.lengthscale)
def _to_dict(self):
input_dict = super(Stationary, self)._to_dict()
input_dict["variance"] = self.variance.values.tolist()
input_dict["lengthscale"] = self.lengthscale.values.tolist()
input_dict["ARD"] = self.ARD
return input_dict
def K_of_r(self, r): def K_of_r(self, r):
raise NotImplementedError("implement the covariance function as a fn of r to use this class") raise NotImplementedError("implement the covariance function as a fn of r to use this class")
@ -351,6 +358,16 @@ class Exponential(Stationary):
def dK_dr(self, r): def dK_dr(self, r):
return -self.K_of_r(r) return -self.K_of_r(r)
def to_dict(self):
input_dict = super(Exponential, self)._to_dict()
input_dict["class"] = "GPy.kern.Exponential"
return input_dict
@staticmethod
def _from_dict(kernel_class, input_dict):
useGPU = input_dict.pop('useGPU', None)
return Exponential(**input_dict)
# def sde(self): # def sde(self):
# """ # """
# Return the state space representation of the covariance. # Return the state space representation of the covariance.
@ -399,6 +416,16 @@ class Matern32(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat32'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat32'):
super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name) super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def to_dict(self):
input_dict = super(Matern32, self)._to_dict()
input_dict["class"] = "GPy.kern.Matern32"
return input_dict
@staticmethod
def _from_dict(kernel_class, input_dict):
useGPU = input_dict.pop('useGPU', None)
return Matern32(**input_dict)
def K_of_r(self, r): def K_of_r(self, r):
return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r) return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
@ -478,6 +505,16 @@ class Matern52(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name) super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def to_dict(self):
input_dict = super(Matern52, self)._to_dict()
input_dict["class"] = "GPy.kern.Matern52"
return input_dict
@staticmethod
def _from_dict(kernel_class, input_dict):
useGPU = input_dict.pop('useGPU', None)
return Matern52(**input_dict)
def K_of_r(self, r): def K_of_r(self, r):
return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r) return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
@ -533,6 +570,16 @@ class ExpQuad(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='ExpQuad'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='ExpQuad'):
super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name) super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def to_dict(self):
input_dict = super(ExpQuad, self)._to_dict()
input_dict["class"] = "GPy.kern.ExpQuad"
return input_dict
@staticmethod
def _from_dict(kernel_class, input_dict):
useGPU = input_dict.pop('useGPU', None)
return ExpQuad(**input_dict)
def K_of_r(self, r): def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r**2) return self.variance * np.exp(-0.5 * r**2)
@ -566,6 +613,17 @@ class RatQuad(Stationary):
self.power = Param('power', power, Logexp()) self.power = Param('power', power, Logexp())
self.link_parameters(self.power) self.link_parameters(self.power)
def to_dict(self):
input_dict = super(RatQuad, self)._to_dict()
input_dict["class"] = "GPy.kern.RatQuad"
input_dict["power"] = self.power.values.tolist()
return input_dict
@staticmethod
def _from_dict(kernel_class, input_dict):
useGPU = input_dict.pop('useGPU', None)
return RatQuad(**input_dict)
def K_of_r(self, r): def K_of_r(self, r):
r2 = np.square(r) r2 = np.square(r)
# return self.variance*np.power(1. + r2/2., -self.power) # return self.variance*np.power(1. + r2/2., -self.power)
@ -588,5 +646,3 @@ class RatQuad(Stationary):
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
super(RatQuad, self).update_gradients_diag(dL_dKdiag, X) super(RatQuad, self).update_gradients_diag(dL_dKdiag, X)
self.power.gradient = 0. self.power.gradient = 0.

View file

@ -7,4 +7,5 @@ from .student_t import StudentT
from .likelihood import Likelihood from .likelihood import Likelihood
from .mixed_noise import MixedNoise from .mixed_noise import MixedNoise
from .binomial import Binomial from .binomial import Binomial
from .weibull import Weibull
from .loglogistic import LogLogistic

View file

@ -29,6 +29,11 @@ class Bernoulli(Likelihood):
if isinstance(gp_link , (link_functions.Heaviside, link_functions.Probit)): if isinstance(gp_link , (link_functions.Heaviside, link_functions.Probit)):
self.log_concave = True self.log_concave = True
def to_dict(self):
input_dict = super(Bernoulli, self)._to_dict()
input_dict["class"] = "GPy.likelihoods.Bernoulli"
return input_dict
def _preprocess_values(self, Y): def _preprocess_values(self, Y):
""" """
Check if the values of the observations correspond to the values Check if the values of the observations correspond to the values

View file

@ -66,7 +66,14 @@ class Binomial(Likelihood):
np.testing.assert_array_equal(N.shape, y.shape) np.testing.assert_array_equal(N.shape, y.shape)
nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1) nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1)
return nchoosey + y*np.log(inv_link_f) + (N-y)*np.log(1.-inv_link_f)
Ny = N-y
t1 = np.zeros(y.shape)
t2 = np.zeros(y.shape)
t1[y>0] = y[y>0]*np.log(inv_link_f[y>0])
t2[Ny>0] = Ny[Ny>0]*np.log(1.-inv_link_f[Ny>0])
return nchoosey + t1 + t2
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
""" """
@ -86,7 +93,13 @@ class Binomial(Likelihood):
N = Y_metadata['trials'] N = Y_metadata['trials']
np.testing.assert_array_equal(N.shape, y.shape) np.testing.assert_array_equal(N.shape, y.shape)
return y/inv_link_f - (N-y)/(1.-inv_link_f) Ny = N-y
t1 = np.zeros(y.shape)
t2 = np.zeros(y.shape)
t1[y>0] = y[y>0]/inv_link_f[y>0]
t2[Ny>0] = (Ny[Ny>0])/(1.-inv_link_f[Ny>0])
return t1 - t2
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None): def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
""" """
@ -111,7 +124,13 @@ class Binomial(Likelihood):
""" """
N = Y_metadata['trials'] N = Y_metadata['trials']
np.testing.assert_array_equal(N.shape, y.shape) np.testing.assert_array_equal(N.shape, y.shape)
return -y/np.square(inv_link_f) - (N-y)/np.square(1.-inv_link_f) Ny = N-y
t1 = np.zeros(y.shape)
t2 = np.zeros(y.shape)
t1[y>0] = -y[y>0]/np.square(inv_link_f[y>0])
t2[Ny>0] = -(Ny[Ny>0])/np.square(1.-inv_link_f[Ny>0])
return t1+t2
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
""" """
@ -135,8 +154,14 @@ class Binomial(Likelihood):
N = Y_metadata['trials'] N = Y_metadata['trials']
np.testing.assert_array_equal(N.shape, y.shape) np.testing.assert_array_equal(N.shape, y.shape)
inv_link_f2 = np.square(inv_link_f) #inv_link_f2 = np.square(inv_link_f) #TODO Remove. Why is this here?
return 2*y/inv_link_f**3 - 2*(N-y)/(1.-inv_link_f)**3
Ny = N-y
t1 = np.zeros(y.shape)
t2 = np.zeros(y.shape)
t1[y>0] = 2*y[y>0]/inv_link_f[y>0]**3
t2[Ny>0] = - 2*(Ny[Ny>0])/(1.-inv_link_f[Ny>0])**3
return t1 + t2
def samples(self, gp, Y_metadata=None, **kw): def samples(self, gp, Y_metadata=None, **kw):
""" """

View file

@ -46,6 +46,13 @@ class Gaussian(Likelihood):
if isinstance(gp_link, link_functions.Identity): if isinstance(gp_link, link_functions.Identity):
self.log_concave = True self.log_concave = True
def to_dict(self):
input_dict = super(Gaussian, self)._to_dict()
input_dict["class"] = "GPy.likelihoods.Gaussian"
input_dict["variance"] = self.variance.values.tolist()
return input_dict
def betaY(self,Y,Y_metadata=None): def betaY(self,Y,Y_metadata=None):
#TODO: ~Ricardo this does not live here #TODO: ~Ricardo this does not live here
raise RuntimeError("Please notify the GPy developers, this should not happen") raise RuntimeError("Please notify the GPy developers, this should not happen")
@ -57,7 +64,10 @@ class Gaussian(Likelihood):
def update_gradients(self, grad): def update_gradients(self, grad):
self.variance.gradient = grad self.variance.gradient = grad
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None): def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag, Y_metadata=None, quad_mode='gk', boost_grad=1.):
return self.exact_inference_gradients(dL_dKdiag)
def exact_inference_gradients(self, dL_dKdiag, Y_metadata=None):
return dL_dKdiag.sum() return dL_dKdiag.sum()
def _preprocess_values(self, Y): def _preprocess_values(self, Y):

View file

@ -6,8 +6,12 @@ from scipy import stats,special
import scipy as sp import scipy as sp
from . import link_functions from . import link_functions
from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian, safe_exp from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian, safe_exp
from ..util.quad_integrate import quadgk_int
from scipy.integrate import quad from scipy.integrate import quad
from functools import partial
import warnings import warnings
from ..core.parameterization import Parameterized from ..core.parameterization import Parameterized
class Likelihood(Parameterized): class Likelihood(Parameterized):
@ -40,6 +44,37 @@ class Likelihood(Parameterized):
self.gp_link = gp_link self.gp_link = gp_link
self.log_concave = False self.log_concave = False
self.not_block_really = False self.not_block_really = False
self.name = name
def to_dict(self):
raise NotImplementedError
def _to_dict(self):
input_dict = {}
input_dict["name"] = self.name
input_dict["gp_link_dict"] = self.gp_link.to_dict()
return input_dict
@staticmethod
def from_dict(input_dict):
import copy
input_dict = copy.deepcopy(input_dict)
likelihood_class = input_dict.pop('class')
input_dict["name"] = str(input_dict["name"])
name = input_dict.pop('name')
import GPy
likelihood_class = eval(likelihood_class)
return likelihood_class._from_dict(likelihood_class, input_dict)
@staticmethod
def _from_dict(likelihood_class, input_dict):
import copy
input_dict = copy.deepcopy(input_dict)
gp_link_dict = input_dict.pop('gp_link_dict')
import GPy
gp_link = GPy.likelihoods.link_functions.GPTransformation.from_dict(gp_link_dict)
input_dict["gp_link"] = gp_link
return likelihood_class(**input_dict)
def request_num_latent_functions(self, Y): def request_num_latent_functions(self, Y):
""" """
@ -223,6 +258,91 @@ class Likelihood(Parameterized):
self.__gh_points = np.polynomial.hermite.hermgauss(T) self.__gh_points = np.polynomial.hermite.hermgauss(T)
return self.__gh_points return self.__gh_points
def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag, Y_metadata=None, quad_mode='gk', boost_grad=1.):
if self.size > 0:
shape = Y.shape
tau,v,Y = cav_tau.flatten(), cav_v.flatten(),Y.flatten()
mu = v/tau
sigma2 = 1./tau
# assert Y.shape == v.shape
dlik_dtheta = np.empty((self.size, Y.shape[0]))
# for j in range(self.size):
Y_metadata_list = []
for index in range(len(Y)):
Y_metadata_i = {}
if Y_metadata is not None:
for key in Y_metadata.keys():
Y_metadata_i[key] = Y_metadata[key][index,:]
Y_metadata_list.append(Y_metadata_i)
if quad_mode == 'gk':
f = partial(self.integrate_gk)
quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten()), Y_metadata_list))
quads = np.vstack(quads)
quads.reshape(self.size, shape[0], shape[1])
elif quad_mode == 'gh':
f = partial(self.integrate_gh)
quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten())))
quads = np.hstack(quads)
quads = quads.T
else:
raise Exception("no other quadrature mode available")
# do a gaussian-hermite integration
dL_dtheta_avg = boost_grad * np.nanmean(quads, axis=1)
dL_dtheta = boost_grad * np.nansum(quads, axis=1)
# dL_dtheta = boost_grad * np.nansum(dlik_dtheta, axis=1)
else:
dL_dtheta = np.zeros(self.num_params)
return dL_dtheta
def integrate_gk(self, Y, mu, sigma, Y_metadata_i=None):
# gaussian-kronrod integration.
fmin = -np.inf
fmax = np.inf
SQRT_2PI = np.sqrt(2.*np.pi)
def generate_integral(f):
a = np.exp(self.logpdf_link(f, Y, Y_metadata_i)) * np.exp(-0.5 * np.square((f - mu) / sigma)) / (
SQRT_2PI * sigma)
fn1 = a * self.dlogpdf_dtheta(f, Y, Y_metadata_i)
fn = fn1
return fn
dF_dtheta_i = quadgk_int(generate_integral, fmin=fmin, fmax=fmax)
return dF_dtheta_i
def integrate_gh(self, Y, mu, sigma, Y_metadata_i=None, gh_points=None):
# gaussian-hermite quadrature.
# "calculate site derivatives E_f{d logp(y_i|f_i)/da} where a is a likelihood parameter
# and the expectation is over the exact marginal posterior, which is not gaussian- and is
# unnormalised product of the cavity distribution(a Gaussian) and the exact likelihood term.
#
# calculate the expectation wrt the approximate marginal posterior, which should be approximately the same.
# . This term is needed for evaluating the
# gradients of the marginal likelihood estimate Z_EP wrt likelihood parameters."
# "writing it explicitly "
# use them for gaussian-hermite quadrature
SQRT_2PI = np.sqrt(2.*np.pi)
if gh_points is None:
gh_x, gh_w = self._gh_points(32)
else:
gh_x, gh_w = gh_points
X = gh_x[None,:]*np.sqrt(2.)*sigma + mu
# Here X is a grid vector of possible fi values, while Y is just a single value which will be broadcasted.
a = np.exp(self.logpdf_link(X, Y, Y_metadata_i))
a = a.repeat(self.num_params,0)
b = self.dlogpdf_dtheta(X, Y, Y_metadata_i)
old_shape = b.shape
fn = np.array([i*j for i,j in zip(a.flatten(), b.flatten())])
fn = fn.reshape(old_shape)
dF_dtheta_i = np.dot(fn, gh_w)/np.sqrt(np.pi)
return dF_dtheta_i
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None): def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
""" """
Use Gauss-Hermite Quadrature to compute Use Gauss-Hermite Quadrature to compute

View file

@ -43,6 +43,25 @@ class GPTransformation(object):
""" """
raise NotImplementedError raise NotImplementedError
def to_dict(self):
raise NotImplementedError
def _to_dict(self):
return {}
@staticmethod
def from_dict(input_dict):
import copy
input_dict = copy.deepcopy(input_dict)
link_class = input_dict.pop('class')
import GPy
link_class = eval(link_class)
return link_class._from_dict(link_class, input_dict)
@staticmethod
def _from_dict(link_class, input_dict):
return link_class(**input_dict)
class Identity(GPTransformation): class Identity(GPTransformation):
""" """
.. math:: .. math::
@ -62,6 +81,10 @@ class Identity(GPTransformation):
def d3transf_df3(self,f): def d3transf_df3(self,f):
return np.zeros_like(f) return np.zeros_like(f)
def to_dict(self):
input_dict = super(Identity, self)._to_dict()
input_dict["class"] = "GPy.likelihoods.link_functions.Identity"
return input_dict
class Probit(GPTransformation): class Probit(GPTransformation):
""" """
@ -82,6 +105,11 @@ class Probit(GPTransformation):
def d3transf_df3(self,f): def d3transf_df3(self,f):
return (safe_square(f)-1.)*std_norm_pdf(f) return (safe_square(f)-1.)*std_norm_pdf(f)
def to_dict(self):
input_dict = super(Probit, self)._to_dict()
input_dict["class"] = "GPy.likelihoods.link_functions.Probit"
return input_dict
class Cloglog(GPTransformation): class Cloglog(GPTransformation):
""" """

View file

@ -0,0 +1,304 @@
# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats, special
from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
from . import link_functions
from .likelihood import Likelihood
class LogGaussian(Likelihood):
"""
.. math::
$$ p(y_{i}|f_{i}, z_{i}) = \\prod_{i=1}^{n} (\\frac{ry^{r-1}}{\\exp{f(x_{i})}})^{1-z_i} (1 + (\\frac{y}{\\exp(f(x_{i}))})^{r})^{z_i-2} $$
.. note:
where z_{i} is the censoring indicator- 0 for non-censored data, and 1 for censored data.
"""
def __init__(self,gp_link=None, sigma=1.):
if gp_link is None:
gp_link = link_functions.Identity()
# gp_link = link_functions.Log()
super(LogGaussian, self).__init__(gp_link, name='loggaussian')
self.sigma = Param('sigma', sigma, Logexp())
self.variance = Param('variance', sigma**2, Logexp())
self.link_parameter(self.variance)
# self.link_parameter()
def pdf_link(self, link_f, y, Y_metadata=None):
"""
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: likelihood evaluated for this point
:rtype: float
"""
return np.exp(self.logpdf_link(link_f, y, Y_metadata=Y_metadata))
def logpdf_link(self, link_f, y, Y_metadata=None):
"""
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
uncensored = (1-c)* (-0.5*np.log(2*np.pi*self.variance) - np.log(y) - (np.log(y)-link_f)**2 /(2*self.variance) )
censored = c*np.log( 1 - stats.norm.cdf((np.log(y) - link_f)/np.sqrt(self.variance)) )
logpdf = uncensored + censored
return logpdf
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
derivative of logpdf wrt link_f param
.. math::
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
val = np.log(y) - link_f
val_scaled = val/np.sqrt(self.variance)
val_scaled2 = val/self.variance
uncensored = (1-c)*(val_scaled2)
a = (1- stats.norm.cdf(val_scaled))
# llg(z) = 1. / (1 - norm_cdf(r / sqrt(s2))). * (1 / sqrt(2 * pi * s2). * exp(-1 / (2. * s2). * r. ^ 2));
censored = c*( 1./a) * (np.exp(-1.* val**2 /(2*self.variance)) / np.sqrt(2*np.pi*self.variance))
# censored = c * (1. / (1 - stats.norm.cdf(val_scaled))) * (stats.norm.pdf(val_scaled))
gradient = uncensored + censored
return gradient
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
# c = Y_metadata['censored']
# c = np.zeros((y.shape[0],))
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
val = np.log(y) - link_f
val_scaled = val/np.sqrt(self.variance)
val_scaled2 = val/self.variance
a = (1 - stats.norm.cdf(val_scaled))
uncensored = (1-c) *(-1)/self.variance
censored = c*(-np.exp(-val**2/self.variance) / ( 2*np.pi*self.variance*(a**2) ) +
val*np.exp(-(val**2)/(2*self.variance))/( np.sqrt(2*np.pi)*self.variance**(3/2.)*a) )
hessian = censored + uncensored
return hessian
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
"""
Gradient of the log-likelihood function at y given f, w.r.t shape parameter
.. math::
:param inv_link_f: latent variables link(f)
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float
"""
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
val = np.log(y) - link_f
val_scaled = val/np.sqrt(self.variance)
val_scaled2 = val/self.variance
a = (1 - stats.norm.cdf(val_scaled))
uncensored = 0
censored = c *( 2*np.exp(-3*(val**2)/(2*self.variance)) / ((a**3)*(2*np.pi*self.variance)**(3/2.))
- val*np.exp(-(val**2)/self.variance)/ ( (a**2)*np.pi*self.variance**2)
- val*np.exp(-(val**2)/self.variance)/ ( (a**2)*2*np.pi*self.variance**2)
- np.exp(-(val**2)/(2*self.variance))/ ( a*(self.variance**(1.50))*np.sqrt(2*np.pi))
+ (val**2)*np.exp(-(val**2)/(2*self.variance))/ ( a*np.sqrt(2*np.pi*self.variance)*self.variance**2 ) )
d3pdf_dlink3 = uncensored + censored
return d3pdf_dlink3
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
"""
Gradient of the log-likelihood function at y given f, w.r.t variance parameter
.. math::
:param inv_link_f: latent variables link(f)
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float
"""
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
val = np.log(y) - link_f
val_scaled = val/np.sqrt(self.variance)
val_scaled2 = val/self.variance
a = (1 - stats.norm.cdf(val_scaled))
uncensored = (1-c)*(-0.5/self.variance + (val**2)/(2*(self.variance**2)) )
censored = c *( val*np.exp(-val**2/ (2*self.variance)) / (a*np.sqrt(2*np.pi)*2*(self.variance**(1.5))) )
dlogpdf_dvar = uncensored + censored
# dlogpdf_dvar = dlogpdf_dvar*self.variance
return dlogpdf_dvar
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
"""
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array
"""
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
val = np.log(y) - link_f
val_scaled = val/np.sqrt(self.variance)
val_scaled2 = val/self.variance
a = (1 - stats.norm.cdf(val_scaled))
uncensored = (1-c)*(-val/(self.variance**2))
censored = c * (-val*np.exp(-val**2/self.variance)/( 4*np.pi*(self.variance**2)*(a**2)) +
(-1 + (val**2)/self.variance)*np.exp(-val**2/(2*self.variance) ) /
( a*(np.sqrt(2.*np.pi)*2*self.variance**1.5)) )
dlik_grad_dsigma = uncensored + censored
# dlik_grad_dsigma = dlik_grad_dsigma*self.variance
return dlik_grad_dsigma
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
"""
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array
"""
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
val = np.log(y) - link_f
val_scaled = val/np.sqrt(self.variance)
val_scaled2 = val/self.variance
a = (1 - stats.norm.cdf(val_scaled))
uncensored = (1-c)*( 1./(self.variance**2) )
censored = c*( val*np.exp(-3*(val**2)/(2*self.variance) )/ ((a**3)*np.sqrt(8*np.pi**3)*self.variance**(5/2.))
+ np.exp(-val**2/self.variance)/((a**2)*4*np.pi*self.variance**2)
- np.exp(-val**2/self.variance)*val**2 / ((a**2)*2*np.pi*self.variance**3)
+ np.exp(-val**2/self.variance)/ ( (a**2)*4*np.pi*self.variance**2)
- np.exp(-val**2/ (2*self.variance))*val / ( a*np.sqrt(2*np.pi)*2*self.variance**(5/2.))
- np.exp(-val**2/self.variance)*(val**2) / ((a**2)*4*np.pi*self.variance**3)
- np.exp(-val**2/ (2*self.variance))*val/ (a*np.sqrt(2*np.pi)*self.variance**(5/2.))
+ np.exp(-val**2/ (2*self.variance))*(val**3) / (a*np.sqrt(2*np.pi)*2*self.variance**(7/2.)) )
dlik_hess_dsigma = uncensored + censored
return dlik_hess_dsigma
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
"""
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array
"""
dlogpdf_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
dlogpdf_dtheta[0,:,:] = self.dlogpdf_link_dvar(f,y,Y_metadata=Y_metadata)
return dlogpdf_dtheta
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
"""
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array
"""
dlogpdf_dlink_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
dlogpdf_dlink_dtheta[0,:,:] = self.dlogpdf_dlink_dvar(f,y,Y_metadata=Y_metadata)
return dlogpdf_dlink_dtheta
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
"""
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array
"""
d2logpdf_dlink2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
d2logpdf_dlink2_dtheta[0,:,:] = self.d2logpdf_dlink2_dvar(f,y,Y_metadata=Y_metadata)
return d2logpdf_dlink2_dtheta
def update_gradients(self, grads):
"""
Pull out the gradients, be careful as the order must match the order
in which the parameters are added
"""
self.variance.gradient = grads[0]
def samples(self, gp, Y_metadata=None):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()

View file

@ -0,0 +1,339 @@
from __future__ import division
# Copyright (c) 2015 Alan Saul
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
from . import link_functions
from .likelihood import Likelihood
from .link_functions import Log
class LogLogistic(Likelihood):
"""
.. math::
$$ p(y_{i}|f_{i}, z_{i}) = \\prod_{i=1}^{n} (\\frac{ry^{r-1}}{\\exp{f(x_{i})}})^{1-z_i} (1 + (\\frac{y}{\\exp(f(x_{i}))})^{r})^{z_i-2} $$
.. note:
where z_{i} is the censoring indicator- 0 for non-censored data, and 1 for censored data.
"""
def __init__(self, gp_link=None, r=1.0):
if gp_link is None:
#Parameterised not as link_f but as f
gp_link = Log()
super(LogLogistic, self).__init__(gp_link, name='LogLogistic')
self.r = Param('r_log_shape', float(r), Logexp())
self.link_parameter(self.r)
# self.censored = 'censored'
def pdf_link(self, link_f, y, Y_metadata=None):
"""
Likelihood function given link(f)
.. math::
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: likelihood evaluated for this point
:rtype: float
"""
return np.exp(self.logpdf_link(link_f, y, Y_metadata=Y_metadata))
def logpdf_link(self, link_f, y, Y_metadata=None):
"""
Log Likelihood Function given link(f)
.. math::
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: likelihood evaluated for this point
:rtype: float
"""
# c = np.zeros((y.shape[0],))
c = np.zeros_like(link_f)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
link_f = np.clip(link_f, 1e-150, 1e100)
# y_link_f = y/link_f
# y_link_f_r = y_link_f**self.r
# y_link_f_r = np.clip(y**self.r, 1e-150, 1e200) / np.clip(link_f**self.r, 1e-150, 1e200)
# y_link_f_r = np.clip((y/link_f)**self.r, 1e-150, 1e200)
y_r = np.clip(y**self.r, 1e-150, 1e200)
link_f_r = np.clip(link_f**self.r, 1e-150, 1e200)
y_link_f_r = np.clip(y_r / link_f_r, 1e-150, 1e200)
#uncensored = (1-c)*(np.log(self.r) + (self.r+1)*np.log(y) - self.r*np.log(link_f) - 2*np.log1p(y_link_f_r))
#uncensored = (1-c)*(np.log((self.r/link_f)*y_link_f**(self.r-1)) - 2*np.log1p(y_link_f_r))
# clever way tp break it into censored and uncensored-parts ..
uncensored = (1-c)*(np.log(self.r) + (self.r-1)*np.log(y) - self.r*np.log(link_f) - 2*np.log1p(y_link_f_r))
censored = (c)*(-np.log1p(y_link_f_r))
#
return uncensored + censored
# return uncensored
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
# c = Y_metadata['censored']
# for debugging
# c = np.zeros((y.shape[0],))
c = np.zeros_like(link_f)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
#y_link_f = y/link_f
#y_link_f_r = y_link_f**self.r
y_link_f_r = np.clip(y**self.r, 1e-150, 1e200) / np.clip(link_f**self.r, 1e-150, 1e200)
#In terms of link_f
# uncensored = (1-c)*( (2*self.r*y**r)/(link_f**self.r + y**self.r) - link_f*self.r)
uncensored = (1-c)*self.r*(y_link_f_r - 1)/(link_f*(1 + y_link_f_r))
censored = c*(self.r*y_link_f_r/(link_f*y_link_f_r + link_f))
return uncensored + censored
# return uncensored
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
# c = Y_metadata['censored']
# c = np.zeros((y.shape[0],))
c = np.zeros_like(link_f)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
y_link_f = y/link_f
y_link_f_r = y_link_f**self.r
#In terms of link_f
censored = c*(-self.r*y_link_f_r*(y_link_f_r + self.r + 1)/((link_f**2)*(y_link_f_r + 1)**2))
uncensored = (1-c)*(-self.r*(2*self.r*y_link_f_r + y_link_f**(2*self.r) - 1) / ((link_f**2)*(1+ y_link_f_r)**2))
hess = censored + uncensored
return hess
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
# c = Y_metadata['censored']
# for debugging
# c = np.zeros((y.shape[0],))
c = np.zeros_like(link_f)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
y_link_f = y/link_f
y_link_f_r = y_link_f**self.r
#In terms of link_f
censored = c*(self.r*y_link_f_r*(((self.r**2)*(-(y_link_f_r - 1))) + 3*self.r*(y_link_f_r + 1) + 2*(y_link_f_r + 1)**2)
/ ((link_f**3)*(y_link_f_r + 1)**3))
uncensored = (1-c)*(2*self.r*(-(self.r**2)*(y_link_f_r -1)*y_link_f_r + 3*self.r*(y_link_f_r + 1)*y_link_f_r + (y_link_f_r - 1)*(y_link_f_r + 1)**2)
/ ((link_f**3)*(y_link_f_r + 1)**3))
d3lik_dlink3 = censored + uncensored
return d3lik_dlink3
def dlogpdf_link_dr(self, inv_link_f, y, Y_metadata=None):
"""
Gradient of the log-likelihood function at y given f, w.r.t shape parameter
.. math::
:param inv_link_f: latent variables link(f)
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float
"""
# c = Y_metadata['censored']
# c = np.zeros((y.shape[0],))
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
link_f = inv_link_f #FIXME: Change names consistently...
y_link_f = y/link_f
log_y_link_f = np.log(y) - np.log(link_f)
y_link_f_r = y_link_f**self.r
#In terms of link_f
censored = c*(-y_link_f_r*log_y_link_f/(1 + y_link_f_r))
uncensored = (1-c)*(1./self.r + np.log(y) - np.log(link_f) - (2*y_link_f_r*log_y_link_f) / (1 + y_link_f_r))
dlogpdf_dr = censored + uncensored
return dlogpdf_dr
def dlogpdf_dlink_dr(self, inv_link_f, y, Y_metadata=None):
"""
Derivative of the dlogpdf_dlink w.r.t shape parameter
.. math::
:param inv_link_f: latent variables inv_link_f
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array
"""
# c = np.zeros((y.shape[0],))
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
link_f = inv_link_f
y_link_f = y/link_f
y_link_f_r = y_link_f**self.r
log_y_link_f = np.log(y) - np.log(link_f)
#In terms of link_f
censored = c*(y_link_f_r*(y_link_f_r + self.r*log_y_link_f + 1)/(link_f*(y_link_f_r + 1)**2))
uncensored = (1-c)*(y_link_f**(2*self.r) + 2*self.r*y_link_f_r*log_y_link_f - 1) / (link_f*(1 + y_link_f_r)**2)
# dlogpdf_dlink_dr = uncensored
dlogpdf_dlink_dr = censored + uncensored
return dlogpdf_dlink_dr
def d2logpdf_dlink2_dr(self, inv_link_f, y, Y_metadata=None):
"""
Gradient of the hessian (d2logpdf_dlink2) w.r.t shape parameter
.. math::
:param inv_link_f: latent variables link(f)
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array
"""
# c = Y_metadata['censored']
# c = np.zeros((y.shape[0],))
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
link_f = inv_link_f
y_link_f = y/link_f
y_link_f_r = y_link_f**self.r
log_y_link_f = np.log(y) - np.log(link_f)
#In terms of link_f
y_link_f_2r = y_link_f**(2*self.r)
denom2 = (link_f**2)*(1 + y_link_f_r)**2
denom3 = (link_f**2)*(1 + y_link_f_r)**3
censored = c*(-((y_link_f_r + self.r + 1)*y_link_f_r)/denom2
-(self.r*(y_link_f_r + self.r + 1)*y_link_f_r*log_y_link_f)/denom2
-(self.r*y_link_f_r*(y_link_f_r*log_y_link_f + 1))/denom2
+(2*self.r*(y_link_f_r + self.r + 1)*y_link_f_2r*log_y_link_f)/denom3
)
uncensored = (1-c)*(-(2*self.r*y_link_f_r + y_link_f_2r - 1)/denom2
-(self.r*(2*y_link_f_r + 2*self.r*y_link_f_r*log_y_link_f + 2*y_link_f_2r*log_y_link_f)/denom2)
+(2*self.r*(2*self.r*y_link_f_r + y_link_f_2r - 1)*y_link_f_r*log_y_link_f)/denom3
)
d2logpdf_dlink2_dr = censored + uncensored
return d2logpdf_dlink2_dr
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
dlogpdf_dtheta[0, :, :] = self.dlogpdf_link_dr(f, y, Y_metadata=Y_metadata)
return dlogpdf_dtheta
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
dlogpdf_dlink_dtheta[0, :, :] = self.dlogpdf_dlink_dr(f, y, Y_metadata=Y_metadata)
return dlogpdf_dlink_dtheta
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
d2logpdf_dlink2_dtheta[0,:, :] = self.d2logpdf_dlink2_dr(f, y, Y_metadata=Y_metadata)
return d2logpdf_dlink2_dtheta
def update_gradients(self, grads):
"""
Pull out the gradients, be careful as the order must match the order
in which the parameters are added
"""
self.r.gradient = grads[0]
def samples(self, gp, Y_metadata=None):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
#rs = np.ones_like(gp)*self.r
#scales = np.ones_like(gp)*np.sqrt(self.sigma2)
#Ysim = sp.stats.fisk.rvs(rs, scale=self.gp_link.transf(gp))
Ysim = np.array([sp.stats.fisk.rvs(self.r, loc=0, scale=self.gp_link.transf(f)) for f in gp])
#np.random.fisk(self.gp_link.transf(gp), c=self.r)
return Ysim.reshape(orig_shape)

322
GPy/likelihoods/weibull.py Normal file
View file

@ -0,0 +1,322 @@
# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats, special
import scipy as sp
from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
from . import link_functions
from .likelihood import Likelihood
class Weibull(Likelihood):
"""
Implementing Weibull likelihood function ...
"""
def __init__(self, gp_link=None, beta=1.):
if gp_link is None:
#Parameterised not as link_f but as f
# gp_link = link_functions.Identity()
#Parameterised as link_f
gp_link = link_functions.Log()
super(Weibull, self).__init__(gp_link, name='Weibull')
self.r = Param('r_weibull_shape', float(beta), Logexp())
self.link_parameter(self.r)
def pdf_link(self, link_f, y, Y_metadata=None):
"""
Likelihood function given link(f)
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in weibull distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
c = np.zeros((link_f.shape[0],))
# log_objective = np.log(self.r) + (self.r - 1) * np.log(y) - link_f - (np.exp(-link_f) * (y ** self.r))
# log_objective = stats.weibull_min.pdf(y,c=self.beta,loc=link_f,scale=1.)
log_objective = self.logpdf_link(link_f, y, Y_metadata)
return np.exp(log_objective)
def logpdf_link(self, link_f, y, Y_metadata=None):
"""
Log Likelihood Function given link(f)
.. math::
\\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables (link(f))
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
# alpha = self.gp_link.transf(gp)*self.beta sum(log(a) + (a-1).*log(y)- f - exp(-f).*y.^a)
# return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
# uncensored = (1-c)* (np.log(self.r) + (self.r - 1) * np.log(y) - link_f - (np.exp(-link_f) * (y ** self.r)))
# censored = (-c)*np.exp(-link_f)*(y**self.r)
uncensored = (1-c)*( np.log(self.r)-np.log(link_f)+(self.r-1)*np.log(y) - y**self.r/link_f)
censored = -c*y**self.r/link_f
log_objective = uncensored + censored
return log_objective
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
.. math::
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\beta (\\log \\beta y_{i}) - \\Psi(\\alpha_{i})\\beta\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables (f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in gamma distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
"""
# grad = (1. - self.beta) / (y - link_f)
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
# uncensored = (1-c)* ( -1 + np.exp(-link_f)*(y ** self.r))
# censored = c*np.exp(-link_f)*(y**self.r)
uncensored = (1-c)*(-1/link_f + y**self.r/link_f**2)
censored = c*y**self.r/link_f**2
grad = uncensored + censored
return grad
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
The hessian will be 0 unless i == j
.. math::
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in gamma distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
.. Note::
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
# hess = (self.beta - 1.) / (y - link_f)**2
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
# uncensored = (1-c)* (-(y ** self.r) * np.exp(-link_f))
# censored = -c*np.exp(-link_f)*y**self.r
uncensored = (1-c)*(1/link_f**2 -2*y**self.r/link_f**3)
censored = -c*2*y**self.r/link_f**3
hess = uncensored + censored
# hess = -(y ** self.r) * np.exp(-link_f)
return hess
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
.. math::
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
\\alpha_{i} = \\beta y_{i}
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in gamma distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
# d3lik_dlink3 = (1. - self.beta) / (y - link_f)**3
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
# uncensored = (1-c)* ((y ** self.r) * np.exp(-link_f))
# censored = c*np.exp(-link_f)*y**self.r
uncensored = (1-c)*(-2/link_f**3+ 6*y**self.r/link_f**4)
censored = c*6*y**self.r/link_f**4
d3lik_dlink3 = uncensored + censored
# d3lik_dlink3 = (y ** self.r) * np.exp(-link_f)
return d3lik_dlink3
def exact_inference_gradients(self, dL_dKdiag, Y_metadata=None):
return np.zeros(self.size)
def dlogpdf_link_dr(self, inv_link_f, y, Y_metadata=None):
"""
Gradient of the log-likelihood function at y given f, w.r.t shape parameter
.. math::
:param inv_link_f: latent variables link(f)
:type inv_link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: includes censoring information in dictionary key 'censored'
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float
"""
c = np.zeros_like(y)
link_f = inv_link_f
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
uncensored = (1-c)* (1./self.r + np.log(y) - y**self.r*np.log(y)/link_f)
censored = (-c*y**self.r*np.log(y)/link_f)
dlogpdf_dr = uncensored + censored
return dlogpdf_dr
def dlogpdf_dlink_dr(self, inv_link_f, y, Y_metadata=None):
"""
First order derivative derivative of loglikelihood wrt r:shape parameter
:param link_f: latent variables link(f)
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param Y_metadata: Y_metadata which is not used in gamma distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
# dlogpdf_dlink_dr = self.beta * y**(self.beta - 1) * np.exp(-link_f)
# dlogpdf_dlink_dr = np.exp(-link_f) * (y ** self.r) * np.log(y)
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
link_f = inv_link_f
# uncensored = (1-c)*(np.exp(-link_f)* (y ** self.r) * np.log(y))
# censored = c*np.exp(-link_f)*(y**self.r)*np.log(y)
uncensored = (1-c)*(y**self.r*np.log(y)/link_f**2)
censored = c*(y**self.r*np.log(y)/link_f**2)
dlogpdf_dlink_dr = uncensored + censored
return dlogpdf_dlink_dr
def d2logpdf_dlink2_dr(self, link_f, y, Y_metadata=None):
"""
Derivative of hessian of loglikelihood wrt r-shape parameter.
:param link_f:
:param y:
:param Y_metadata:
:return:
"""
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
# uncensored = (1-c)*( -np.exp(-link_f)* (y ** self.r) * np.log(y))
# censored = -c*np.exp(-link_f)*(y**self.r)*np.log(y)
uncensored = (1-c)*-2*y**self.r*np.log(y)/link_f**3
censored = c*-2*y**self.r*np.log(y)/link_f**3
d2logpdf_dlink_dr = uncensored + censored
return d2logpdf_dlink_dr
def d3logpdf_dlink3_dr(self, link_f, y, Y_metadata=None):
"""
:param link_f:
:param y:
:param Y_metadata:
:return:
"""
c = np.zeros_like(y)
if Y_metadata is not None and 'censored' in Y_metadata.keys():
c = Y_metadata['censored']
uncensored = (1-c)* ((y**self.r)*np.exp(-link_f)*np.log1p(y))
censored = c*np.exp(-link_f)*(y**self.r)*np.log(y)
d3logpdf_dlink3_dr = uncensored + censored
return d3logpdf_dlink3_dr
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
"""
:param f:
:param y:
:param Y_metadata:
:return:
"""
dlogpdf_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
dlogpdf_dtheta[0, :, :] = self.dlogpdf_link_dr(f, y, Y_metadata=Y_metadata)
return dlogpdf_dtheta
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
"""
:param f:
:param y:
:param Y_metadata:
:return:
"""
dlogpdf_dlink_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
dlogpdf_dlink_dtheta[0, :, :] = self.dlogpdf_dlink_dr(f, y, Y_metadata)
return dlogpdf_dlink_dtheta
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
"""
:param f:
:param y:
:param Y_metadata:
:return:
"""
d2logpdf_dlink_dtheta2 = np.zeros((self.size, f.shape[0], f.shape[1]))
d2logpdf_dlink_dtheta2[0, :, :] = self.d2logpdf_dlink2_dr(f, y, Y_metadata)
return d2logpdf_dlink_dtheta2
def update_gradients(self, grads):
"""
Pull out the gradients, be careful as the order must match the order
in which the parameters are added
"""
self.r.gradient = grads[0]
def samples(self, gp, Y_metadata=None):
"""
Returns a set of samples of observations conditioned on a given value of latent variable f.
:param gp: latent variable
"""
orig_shape = gp.shape
gp = gp.flatten()
weibull_samples = np.array([sp.stats.weibull_min.rvs(self.r, loc=0, scale=self.gp_link.transf(f)) for f in gp])
return weibull_samples.reshape(orig_shape)

View file

@ -38,3 +38,9 @@ class Constant(Mapping):
def gradients_X(self, dL_dF, X): def gradients_X(self, dL_dF, X):
return np.zeros_like(X) return np.zeros_like(X)
def to_dict(self):
input_dict = super(Constant, self)._to_dict()
input_dict["class"] = "GPy.mappings.Constant"
input_dict["value"] = self.C.values[0]
return input_dict

View file

@ -19,8 +19,7 @@ class Identity(Mapping):
def gradients_X(self, dL_dF, X): def gradients_X(self, dL_dF, X):
return dL_dF return dL_dF
def to_dict(self):
input_dict = super(Identity, self)._to_dict()
input_dict["class"] = "GPy.mappings.Identity"
return input_dict

View file

@ -37,3 +37,21 @@ class Linear(Mapping):
def gradients_X(self, dL_dF, X): def gradients_X(self, dL_dF, X):
return np.dot(dL_dF, self.A.T) return np.dot(dL_dF, self.A.T)
def to_dict(self):
input_dict = super(Linear, self)._to_dict()
input_dict["class"] = "GPy.mappings.Linear"
input_dict["A"] = self.A.values.tolist()
return input_dict
@staticmethod
def _from_dict(mapping_class, input_dict):
import copy
input_dict = copy.deepcopy(input_dict)
A = np.array(input_dict.pop('A'))
l = Linear(**input_dict)
l.unlink_parameter(l.A)
l.update_model(False)
l.A = Param('A', A)
l.link_parameter(l.A)
return l

View file

@ -35,7 +35,7 @@ class MLP(Mapping):
# Backpropagation to hidden layer. # Backpropagation to hidden layer.
dL_dact = np.dot(dL_dF, self.W2.T) dL_dact = np.dot(dL_dF, self.W2.T)
dL_dlayer1 = dL_dact / np.square(np.cosh(layer1)) dL_dlayer1 = dL_dact * (1 - np.square(activations))
# Finally, evaluate the first-layer gradients. # Finally, evaluate the first-layer gradients.
self.W1.gradient = np.dot(X.T,dL_dlayer1) self.W1.gradient = np.dot(X.T,dL_dlayer1)
@ -47,7 +47,7 @@ class MLP(Mapping):
# Backpropagation to hidden layer. # Backpropagation to hidden layer.
dL_dact = np.dot(dL_dF, self.W2.T) dL_dact = np.dot(dL_dF, self.W2.T)
dL_dlayer1 = dL_dact / np.square(np.cosh(layer1)) dL_dlayer1 = dL_dact * (1 - np.square(activations))
return np.dot(dL_dlayer1, self.W1.T) return np.dot(dL_dlayer1, self.W1.T)

View file

@ -9,6 +9,7 @@ from .gplvm import GPLVM
from .bcgplvm import BCGPLVM from .bcgplvm import BCGPLVM
from .sparse_gplvm import SparseGPLVM from .sparse_gplvm import SparseGPLVM
from .warped_gp import WarpedGP from .warped_gp import WarpedGP
from .input_warped_gp import InputWarpedGP
from .bayesian_gplvm import BayesianGPLVM from .bayesian_gplvm import BayesianGPLVM
from .mrd import MRD from .mrd import MRD
from .gradient_checker import GradientChecker, HessianChecker, SkewChecker from .gradient_checker import GradientChecker, HessianChecker, SkewChecker

View file

@ -4,6 +4,7 @@
from ..core import GP from ..core import GP
from .. import likelihoods from .. import likelihoods
from .. import kern from .. import kern
import numpy as np
from ..inference.latent_function_inference.expectation_propagation import EP from ..inference.latent_function_inference.expectation_propagation import EP
class GPClassification(GP): class GPClassification(GP):
@ -27,3 +28,23 @@ class GPClassification(GP):
likelihood = likelihoods.Bernoulli() likelihood = likelihoods.Bernoulli()
GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=EP(), mean_function=mean_function, name='gp_classification') GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=EP(), mean_function=mean_function, name='gp_classification')
@staticmethod
def from_gp(gp):
from copy import deepcopy
gp = deepcopy(gp)
GPClassification(gp.X, gp.Y, gp.kern, gp.likelihood, gp.inference_method, gp.mean_function, name='gp_classification')
def to_dict(self, save_data=True):
model_dict = super(GPClassification,self).to_dict(save_data)
model_dict["class"] = "GPy.models.GPClassification"
return model_dict
@staticmethod
def from_dict(input_dict, data=None):
import GPy
m = GPy.core.model.Model.from_dict(input_dict, data)
return GPClassification.from_gp(m)
def save_model(self, output_filename, compress=True, save_data=True):
self._save_model(output_filename, compress=True, save_data=True)

View file

@ -35,3 +35,23 @@ class GPRegression(GP):
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function) super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function)
@staticmethod
def from_gp(gp):
from copy import deepcopy
gp = deepcopy(gp)
return GPRegression(gp.X, gp.Y, gp.kern, gp.Y_metadata, gp.normalizer, gp.likelihood.variance.values, gp.mean_function)
def to_dict(self, save_data=True):
model_dict = super(GPRegression,self).to_dict(save_data)
model_dict["class"] = "GPy.models.GPRegression"
return model_dict
@staticmethod
def _from_dict(input_dict, data=None):
import GPy
input_dict["class"] = "GPy.core.GP"
m = GPy.core.GP.from_dict(input_dict, data)
return GPRegression.from_gp(m)
def save_model(self, output_filename, compress=True, save_data=True):
self._save_model(output_filename, compress=True, save_data=True)

View file

@ -0,0 +1,149 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from .. import likelihoods
from ..util.input_warping_functions import KumarWarping
from .. import kern
class InputWarpedGP(GP):
"""Input Warped GP
This defines a GP model that applies a warping function to the Input.
By default, it uses Kumar Warping (CDF of Kumaraswamy distribution)
Parameters
----------
X : array_like, shape = (n_samples, n_features) for input data
Y : array_like, shape = (n_samples, 1) for output data
kernel : object, optional
An instance of kernel function defined in GPy.kern
Default to Matern 32
warping_function : object, optional
An instance of warping function defined in GPy.util.input_warping_functions
Default to KumarWarping
warping_indices : list of int, optional
An list of indices of which features in X should be warped.
It is used in the Kumar warping function
normalizer : bool, optional
A bool variable indicates whether to normalize the output
Xmin : list of float, optional
The min values for every feature in X
It is used in the Kumar warping function
Xmax : list of float, optional
The max values for every feature in X
It is used in the Kumar warping function
epsilon : float, optional
We normalize X to [0+e, 1-e]. If not given, using the default value defined in KumarWarping function
Attributes
----------
X_untransformed : array_like, shape = (n_samples, n_features)
A copy of original input X
X_warped : array_like, shape = (n_samples, n_features)
Input data after warping
warping_function : object, optional
An instance of warping function defined in GPy.util.input_warping_functions
Default to KumarWarping
Notes
-----
Kumar warping uses the CDF of Kumaraswamy distribution. More on the Kumaraswamy distribution can be found at the
wiki page: https://en.wikipedia.org/wiki/Kumaraswamy_distribution
References
----------
Snoek, J.; Swersky, K.; Zemel, R. S. & Adams, R. P.
Input Warping for Bayesian Optimization of Non-stationary Functions
preprint arXiv:1402.0929, 2014
"""
def __init__(self, X, Y, kernel=None, normalizer=False, warping_function=None, warping_indices=None, Xmin=None, Xmax=None, epsilon=None):
if X.ndim == 1:
X = X.reshape(-1, 1)
self.X_untransformed = X.copy()
if kernel is None:
kernel = kern.sde_Matern32(X.shape[1], variance=1.)
self.kernel = kernel
if warping_function is None:
self.warping_function = KumarWarping(self.X_untransformed, warping_indices, epsilon, Xmin, Xmax)
else:
self.warping_function = warping_function
self.X_warped = self.transform_data(self.X_untransformed)
likelihood = likelihoods.Gaussian()
super(InputWarpedGP, self).__init__(self.X_warped, Y, likelihood=likelihood, kernel=kernel, normalizer=normalizer)
# Add the parameters in the warping function to the model parameters hierarchy
self.link_parameter(self.warping_function)
def parameters_changed(self):
"""Update the gradients of parameters for warping function
This method is called when having new values of parameters for warping function, kernels
and other parameters in a normal GP
"""
# using the warped X to update
self.X = self.transform_data(self.X_untransformed)
super(InputWarpedGP, self).parameters_changed()
# the gradient of log likelihood w.r.t. input AFTER warping is a product of dL_dK and dK_dX
dL_dX = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X)
self.warping_function.update_grads(self.X_untransformed, dL_dX)
def transform_data(self, X, test_data=False):
"""Apply warping_function to some Input data
Parameters
----------
X : array_like, shape = (n_samples, n_features)
test_data: bool, optional
Default to False, should set to True when transforming test data
"""
return self.warping_function.f(X, test_data)
def log_likelihood(self):
"""Compute the marginal log likelihood
For input warping, just use the normal GP log likelihood
"""
return GP.log_likelihood(self)
def predict(self, Xnew):
"""Prediction on the new data
Parameters
----------
Xnew : array_like, shape = (n_samples, n_features)
The test data.
Returns
-------
mean : array_like, shape = (n_samples, output.dim)
Posterior mean at the location of Xnew
var : array_like, shape = (n_samples, 1)
Posterior variance at the location of Xnew
"""
Xnew_warped = self.transform_data(Xnew, test_data=True)
mean, var = super(InputWarpedGP, self).predict(Xnew_warped, kern=self.kernel, full_cov=False)
return mean, var
if __name__ == '__main__':
X = np.random.randn(100, 1)
Y = np.sin(X) + np.random.randn(100, 1)*0.05
m = InputWarpedGP(X, Y)

View file

@ -65,7 +65,7 @@ def plot_ARD(kernel, filtering=None, legend=False, canvas=None, **kwargs):
if canvas is None: if canvas is None:
canvas, kwargs = pl().new_canvas(xlim=(-.5, kernel._effective_input_dim-.5), xlabel='input dimension', ylabel='sensitivity', **kwargs) canvas, kwargs = pl().new_canvas(xlim=(-.5, kernel._effective_input_dim-.5), xlabel='input dimension', ylabel='ard contribution', **kwargs)
for i in range(ard_params.shape[0]): for i in range(ard_params.shape[0]):
if parts[i].name in filtering: if parts[i].name in filtering:

View file

@ -0,0 +1,147 @@
import numpy as np
import unittest
import GPy
from GPy.models import GradientChecker
fixed_seed = 10
from nose.tools import with_setup, nottest
# this file will contain some high level tests, this is not unit testing, but will give us a higher level estimate
# if things are going well under the hood.
class TestObservationModels(unittest.TestCase):
def setUp(self):
np.random.seed(fixed_seed)
self.N = 100
self.D = 2
self.X = np.random.rand(self.N, self.D)
self.real_noise_std = 0.05
noise = np.random.randn(*self.X[:, 0].shape) * self.real_noise_std
self.Y = (np.sin(self.X[:, 0] * 2 * np.pi) + noise)[:, None]
self.num_points = self.X.shape[0]
self.f = np.random.rand(self.N, 1)
self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None]
# self.binary_Y[self.binary_Y == 0.0] = -1.0
self.positive_Y = np.exp(self.Y.copy())
self.Y_noisy = self.Y.copy()
self.Y_verynoisy = self.Y.copy()
self.Y_noisy[75] += 1.3
self.init_var = 0.15
self.deg_free = 4.
censored = np.zeros_like(self.Y)
random_inds = np.random.choice(self.N, int(self.N / 2), replace=True)
censored[random_inds] = 1
self.Y_metadata = dict()
self.Y_metadata['censored'] = censored
self.kernel1 = GPy.kern.RBF(self.X.shape[1]) + GPy.kern.White(self.X.shape[1])
def tearDown(self):
self.Y = None
self.X = None
self.binary_Y =None
self.positive_Y = None
self.kernel1 = None
@with_setup(setUp, tearDown)
def testEPClassification(self):
bernoulli = GPy.likelihoods.Bernoulli()
laplace_inf = GPy.inference.latent_function_inference.Laplace()
ep_inf_alt = GPy.inference.latent_function_inference.EP(ep_mode='alternated')
ep_inf_nested = GPy.inference.latent_function_inference.EP(ep_mode='nested')
ep_inf_fractional = GPy.inference.latent_function_inference.EP(ep_mode='nested', eta=0.9)
m1 = GPy.core.GP(self.X, self.binary_Y.copy(), kernel=self.kernel1.copy(), likelihood=bernoulli.copy(), inference_method=laplace_inf)
m1.randomize()
m2 = GPy.core.GP(self.X, self.binary_Y.copy(), kernel=self.kernel1.copy(), likelihood=bernoulli.copy(), inference_method=ep_inf_alt)
m2.randomize()
m3 = GPy.core.GP(self.X, self.binary_Y.copy(), kernel=self.kernel1.copy(), likelihood=bernoulli.copy(), inference_method=ep_inf_nested)
m3.randomize()
#
m4 = GPy.core.GP(self.X, self.binary_Y.copy(), kernel=self.kernel1.copy(), likelihood=bernoulli.copy(), inference_method=ep_inf_fractional)
m4.randomize()
optimizer = 'bfgs'
#do gradcheck here ...
# self.assertTrue(m1.checkgrad())
# self.assertTrue(m2.checkgrad())
# self.assertTrue(m3.checkgrad())
# self.assertTrue(m4.checkgrad())
m1.optimize(optimizer=optimizer, max_iters=300)
m2.optimize(optimizer=optimizer, max_iters=300)
m3.optimize(optimizer=optimizer, max_iters=300)
m4.optimize(optimizer=optimizer, max_iters=300)
# taking laplace predictions as the ground truth
probs_mean_lap, probs_var_lap = m1.predict(self.X)
probs_mean_ep_alt, probs_var_ep_alt = m2.predict(self.X)
probs_mean_ep_nested, probs_var_ep_nested = m3.predict(self.X)
# for simple single dimension data , marginal likelihood for laplace and EP approximations should not be so far apart.
self.assertAlmostEqual(m1.log_likelihood(), m2.log_likelihood(),delta=1)
self.assertAlmostEqual(m1.log_likelihood(), m3.log_likelihood(), delta=1)
self.assertAlmostEqual(m1.log_likelihood(), m4.log_likelihood(), delta=5)
GPy.util.classification.conf_matrix(probs_mean_lap, self.binary_Y)
GPy.util.classification.conf_matrix(probs_mean_ep_alt, self.binary_Y)
GPy.util.classification.conf_matrix(probs_mean_ep_nested, self.binary_Y)
@nottest
def rmse(self, Y, Ystar):
return np.sqrt(np.mean((Y - Ystar) ** 2))
@with_setup(setUp, tearDown)
def test_EP_with_StudentT(self):
studentT = GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.init_var)
laplace_inf = GPy.inference.latent_function_inference.Laplace()
ep_inf_alt = GPy.inference.latent_function_inference.EP(ep_mode='alternated')
ep_inf_nested = GPy.inference.latent_function_inference.EP(ep_mode='nested')
ep_inf_frac = GPy.inference.latent_function_inference.EP(ep_mode='nested', eta=0.7)
m1 = GPy.core.GP(self.X.copy(), self.Y_noisy.copy(), kernel=self.kernel1.copy(), likelihood=studentT.copy(), inference_method=laplace_inf)
# optimize
m1['.*white'].constrain_fixed(1e-5)
m1.randomize()
m2 = GPy.core.GP(self.X.copy(), self.Y_noisy.copy(), kernel=self.kernel1.copy(), likelihood=studentT.copy(), inference_method=ep_inf_alt)
m2['.*white'].constrain_fixed(1e-5)
# m2.constrain_bounded('.*t_scale2', 0.001, 10)
m2.randomize()
# m3 = GPy.core.GP(self.X, self.Y_noisy.copy(), kernel=self.kernel1, likelihood=studentT.copy(), inference_method=ep_inf_nested)
# m3['.*white'].constrain_fixed(1e-5)
# # m3.constrain_bounded('.*t_scale2', 0.001, 10)
# m3.randomize()
optimizer='bfgs'
m1.optimize(optimizer=optimizer,max_iters=400)
m2.optimize(optimizer=optimizer, max_iters=400)
# m3.optimize(optimizer=optimizer, max_iters=500)
self.assertAlmostEqual(m1.log_likelihood(), m2.log_likelihood(),delta=200)
# self.assertAlmostEqual(m1.log_likelihood(), m3.log_likelihood(), 3)
preds_mean_lap, preds_var_lap = m1.predict(self.X)
preds_mean_alt, preds_var_alt = m2.predict(self.X)
# preds_mean_nested, preds_var_nested = m3.predict(self.X)
rmse_lap = self.rmse(preds_mean_lap, self.Y)
rmse_alt = self.rmse(preds_mean_alt, self.Y)
# rmse_nested = self.rmse(preds_mean_nested, self.Y_noisy)
if rmse_alt > rmse_lap:
self.assertAlmostEqual(rmse_lap, rmse_alt, delta=1.5)
# m3.optimize(optimizer=optimizer, max_iters=500)
if __name__ == "__main__":
unittest.main()

View file

@ -61,6 +61,18 @@ class InferenceGPEP(unittest.TestCase):
Y = lik.samples(f).reshape(-1,1) Y = lik.samples(f).reshape(-1,1)
return X, Y return X, Y
def genNoisyData(self):
np.random.seed(1)
X = np.random.rand(100,1)
self.real_std = 0.1
noise = np.random.randn(*X[:, 0].shape)*self.real_std
Y = (np.sin(X[:, 0]*2*np.pi) + noise)[:, None]
self.f = np.random.rand(X.shape[0],1)
Y_extra_noisy = Y.copy()
Y_extra_noisy[50] += 4.
# Y_extra_noisy[80:83] -= 2.
return X, Y, Y_extra_noisy
def test_inference_EP(self): def test_inference_EP(self):
from paramz import ObsAr from paramz import ObsAr
X, Y = self.genData() X, Y = self.genData()
@ -73,11 +85,45 @@ class InferenceGPEP(unittest.TestCase):
inference_method=inf, inference_method=inf,
likelihood=lik) likelihood=lik)
K = self.model.kern.K(X) K = self.model.kern.K(X)
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self.model.inference_method.expectation_propagation(K, ObsAr(Y), lik, None)
v_tilde = mu_tilde * tau_tilde post_params, ga_approx, cav_params, log_Z_tilde = self.model.inference_method.expectation_propagation(K, ObsAr(Y), lik, None)
p, m, d = self.model.inference_method._inference(K, tau_tilde, v_tilde, lik, Y_metadata=None, Z_tilde=log_Z_tilde.sum())
p0, m0, d0 = super(GPy.inference.latent_function_inference.expectation_propagation.EP, inf).inference(k, X,lik ,mu_tilde[:,None], mean_function=None, variance=1./tau_tilde, K=K, Z_tilde=log_Z_tilde.sum() + np.sum(- 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde))) mu_tilde = ga_approx.v / ga_approx.tau.astype(float)
p, m, d = self.model.inference_method._inference(Y, K, ga_approx, cav_params, lik, Y_metadata=None, Z_tilde=log_Z_tilde)
p0, m0, d0 = super(GPy.inference.latent_function_inference.expectation_propagation.EP, inf).inference(k, X,lik ,mu_tilde[:,None], mean_function=None, variance=1./ga_approx.tau, K=K, Z_tilde=log_Z_tilde + np.sum(- 0.5*np.log(ga_approx.tau) + 0.5*(ga_approx.v*ga_approx.v*1./ga_approx.tau)))
assert (np.sum(np.array([m - m0,
np.sum(d['dL_dK'] - d0['dL_dK']),
np.sum(d['dL_dthetaL'] - d0['dL_dthetaL']),
np.sum(d['dL_dm'] - d0['dL_dm']),
np.sum(p._woodbury_vector - p0._woodbury_vector),
np.sum(p.woodbury_inv - p0.woodbury_inv)])) < 1e6)
# NOTE: adding a test like above for parameterized likelihood- the above test is
# only for probit likelihood which does not have any tunable hyperparameter which is why
# the term in dictionary of gradients: dL_dthetaL will always be zero. So here we repeat tests for
# student-t likelihood and heterodescastic gaussian noise case. This test simply checks if the posterior
# and gradients of log marginal are roughly the same for inference through EP and exact gaussian inference using
# the gaussian approximation for the individual likelihood site terms. For probit likelihood, it is possible to
# calculate moments analytically, but for other likelihoods, we will need to use numerical quadrature techniques,
# and it is possible that any error might creep up because of quadrature implementation.
def test_inference_EP_non_classification(self):
from paramz import ObsAr
X, Y, Y_extra_noisy = self.genNoisyData()
deg_freedom = 5.
init_noise_var = 0.08
lik_studentT = GPy.likelihoods.StudentT(deg_free=deg_freedom, sigma2=init_noise_var)
# like_gaussian_noise = GPy.likelihoods.MixedNoise()
k = GPy.kern.RBF(1, variance=2., lengthscale=1.1)
ep_inf_alt = GPy.inference.latent_function_inference.expectation_propagation.EP(max_iters=4, delta=0.5)
# ep_inf_nested = GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode='nested', max_iters=100, delta=0.5)
m = GPy.core.GP(X=X,Y=Y_extra_noisy,kernel=k,likelihood=lik_studentT,inference_method=ep_inf_alt)
K = m.kern.K(X)
post_params, ga_approx, cav_params, log_Z_tilde = m.inference_method.expectation_propagation(K, ObsAr(Y_extra_noisy), lik_studentT, None)
mu_tilde = ga_approx.v / ga_approx.tau.astype(float)
p, m, d = m.inference_method._inference(Y_extra_noisy, K, ga_approx, cav_params, lik_studentT, Y_metadata=None, Z_tilde=log_Z_tilde)
p0, m0, d0 = super(GPy.inference.latent_function_inference.expectation_propagation.EP, ep_inf_alt).inference(k, X,lik_studentT ,mu_tilde[:,None], mean_function=None, variance=1./ga_approx.tau, K=K, Z_tilde=log_Z_tilde + np.sum(- 0.5*np.log(ga_approx.tau) + 0.5*(ga_approx.v*ga_approx.v*1./ga_approx.tau)))
assert (np.sum(np.array([m - m0, assert (np.sum(np.array([m - m0,
np.sum(d['dL_dK'] - d0['dL_dK']), np.sum(d['dL_dK'] - d0['dL_dK']),

View file

@ -123,6 +123,11 @@ class TestNoiseModels(object):
self.var = 0.2 self.var = 0.2
self.deg_free = 4.0 self.deg_free = 4.0
censored = np.zeros_like(self.Y)
random_inds = np.random.choice(self.N, int(self.N / 2), replace=True)
censored[random_inds] = 1
self.Y_metadata = dict()
self.Y_metadata['censored'] = censored
#Make a bigger step as lower bound can be quite curved #Make a bigger step as lower bound can be quite curved
self.step = 1e-4 self.step = 1e-4
@ -274,6 +279,20 @@ class TestNoiseModels(object):
"Y_metadata": {'trials': self.ns}, "Y_metadata": {'trials': self.ns},
"laplace": True, "laplace": True,
}, },
"loglogistic_censored": {
"model": GPy.likelihoods.LogLogistic(),
"link_f_constraints": [self.constrain_positive],
"Y": self.positive_Y,
"Y_metadata": self.Y_metadata,
"laplace": True
},
"weibull_censored": {
"model": GPy.likelihoods.Weibull(),
"link_f_constraints": [self.constrain_positive],
"Y": self.positive_Y,
"Y_metadata": self.Y_metadata,
"laplace": True
}
#, #,
#GAMMA needs some work!"Gamma_default": { #GAMMA needs some work!"Gamma_default": {
#"model": GPy.likelihoods.Gamma(), #"model": GPy.likelihoods.Gamma(),

View file

@ -399,6 +399,68 @@ class MiscTests(unittest.TestCase):
m.optimize() m.optimize()
print(m) print(m)
def test_input_warped_gp_identity(self):
"""
A InputWarpedGP with the identity warping function should be
equal to a standard GP.
"""
k = GPy.kern.RBF(1)
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
m.optimize()
preds = m.predict(self.X)
warp_k = GPy.kern.RBF(1)
warp_f = GPy.util.input_warping_functions.IdentifyWarping()
warp_m = GPy.models.InputWarpedGP(self.X, self.Y, kernel=warp_k, warping_function=warp_f)
warp_m.optimize()
warp_preds = warp_m.predict(self.X)
np.testing.assert_almost_equal(preds, warp_preds, decimal=4)
def test_kumar_warping_gradient(self):
n_X = 100
np.random.seed(0)
X = np.random.randn(n_X, 2)
Y = np.sum(np.sin(X), 1).reshape(n_X, 1)
k1 = GPy.kern.Linear(2)
m1 = GPy.models.InputWarpedGP(X, Y, kernel=k1)
m1.randomize()
self.assertEquals(m1.checkgrad(), True)
k2 = GPy.kern.RBF(2)
m2 = GPy.models.InputWarpedGP(X, Y, kernel=k2)
m2.randomize()
m2.checkgrad()
self.assertEquals(m2.checkgrad(), True)
k3 = GPy.kern.Matern52(2)
m3 = GPy.models.InputWarpedGP(X, Y, kernel=k3)
m3.randomize()
m3.checkgrad()
self.assertEquals(m3.checkgrad(), True)
def test_kumar_warping_parameters(self):
np.random.seed(1)
X = np.random.rand(5, 2)
epsilon = 1e-6
# testing warping indices
warping_ind_1 = [0, 1, 2]
warping_ind_2 = [-1, 1, 2]
warping_ind_3 = [0, 1.5, 2]
self.failUnlessRaises(ValueError, GPy.util.input_warping_functions.KumarWarping, X, warping_ind_1)
self.failUnlessRaises(ValueError, GPy.util.input_warping_functions.KumarWarping, X, warping_ind_2)
self.failUnlessRaises(ValueError, GPy.util.input_warping_functions.KumarWarping, X, warping_ind_3)
# testing Xmin and Xmax
Xmin_1, Xmax_1 = None, [1, 1]
Xmin_2, Xmax_2 = [0, 0], None
Xmin_3, Xmax_3 = [0, 0, 0], [1, 1]
self.failUnlessRaises(ValueError, GPy.util.input_warping_functions.KumarWarping, X, [0, 1], epsilon, Xmin_1, Xmax_1)
self.failUnlessRaises(ValueError, GPy.util.input_warping_functions.KumarWarping, X, [0, 1], epsilon, Xmin_2, Xmax_2)
self.failUnlessRaises(ValueError, GPy.util.input_warping_functions.KumarWarping, X, [0, 1], epsilon, Xmin_3, Xmax_3)
def test_warped_gp_identity(self): def test_warped_gp_identity(self):
""" """
A WarpedGP with the identity warping function should be A WarpedGP with the identity warping function should be

View file

@ -0,0 +1,39 @@
from __future__ import print_function, division
import numpy as np
import GPy
import warnings
from ..util.quad_integrate import quadgk_int, quadvgk
class QuadTests(np.testing.TestCase):
"""
test file for checking implementation of gaussian-kronrod quadrature.
we will take a function which can be integrated analytically and check if quadgk result is similar or not!
through this file we can test how numerically accurate quadrature implementation in native numpy or manual code is.
"""
def setUp(self):
pass
def test_infinite_quad(self):
def f(x):
return np.exp(-0.5*x**2)*np.power(x,np.arange(3)[:,None])
quad_int_val = quadgk_int(f)
real_val = np.sqrt(np.pi * 2)
np.testing.assert_almost_equal(real_val, quad_int_val[0], decimal=7)
def test_finite_quad(self):
def f2(x):
return x**2
quad_int_val = quadvgk(f2, 1.,2.)
real_val = 7/3.
np.testing.assert_almost_equal(real_val, quad_int_val, decimal=5)
if __name__ == '__main__':
def f(x):
return np.exp(-0.5 * x ** 2) * np.power(x, np.arange(3)[:, None])
quad_int_val = quadgk_int(f)
real_val = np.sqrt(np.pi*2)
np.testing.assert_almost_equal(real_val, quad_int_val[0], decimal=7)
print(quadgk_int(f))

View file

@ -0,0 +1,202 @@
'''
Created on 20 April 2017
@author: pgmoren
'''
import unittest, itertools
#import cPickle as pickle
import pickle
import numpy as np
import tempfile
import GPy
from nose import SkipTest
import numpy as np
fixed_seed = 11
class Test(unittest.TestCase):
def test_serialize_deserialize_kernels(self):
k1 = GPy.kern.RBF(2, variance=1.0, lengthscale=[1.0,1.0], ARD=True)
k2 = GPy.kern.RatQuad(2, variance=2.0, lengthscale=1.0, power=2.0, active_dims = [0,1])
k3 = GPy.kern.Bias(2, variance=2.0, active_dims = [1,0])
k4 = GPy.kern.StdPeriodic(2, variance=2.0, lengthscale=1.0, period=1.0, active_dims = [1,1])
k5 = GPy.kern.Linear(2, variances=[2.0, 1.0], ARD=True, active_dims = [1,1])
k6 = GPy.kern.Exponential(2, variance=1., lengthscale=2)
k7 = GPy.kern.Matern32(2, variance=1.0, lengthscale=[1.0,3.0], ARD=True, active_dims = [1,1])
k8 = GPy.kern.Matern52(2, variance=2.0, lengthscale=[2.0,1.0], ARD=True, active_dims = [1,0])
k9 = GPy.kern.ExpQuad(2, variance=3.0, lengthscale=[1.0,2.0], ARD=True, active_dims = [0,1])
k10 = k1 + k1.copy() + k2 + k3 + k4 + k5 + k6
k11 = k1 * k2 * k2.copy() * k3 * k4 * k5
k12 = (k1 + k2) * (k3 + k4 + k5)
k13 = ((k1 + k2) * k3) + k4 + k5 * k7
k14 = ((k1 + k2) * k3) + k4 * k5 + k8
k15 = ((k1 * k2) * k3) + k4 * k5 + k8 + k9
k_list = [k1,k2,k3,k4,k5,k6,k7,k8,k9,k10,k11,k12,k13,k14,k15]
for kk in k_list:
kk_dict = kk.to_dict()
kk_r = GPy.kern.Kern.from_dict(kk_dict)
assert type(kk) == type(kk_r)
np.testing.assert_array_equal(kk[:], kk_r[:])
np.testing.assert_array_equal(np.array(kk.active_dims), np.array(kk_r.active_dims))
def test_serialize_deserialize_mappings(self):
m1 = GPy.mappings.Identity(3,2)
m2 = GPy.mappings.Constant(3,2,1)
m2_r = GPy.core.mapping.Mapping.from_dict(m2.to_dict())
np.testing.assert_array_equal(m2.C.values[:], m2_r.C.values[:])
m3 = GPy.mappings.Linear(3,2)
m3_r = GPy.core.mapping.Mapping.from_dict(m3.to_dict())
assert np.all(m3.A == m3_r.A)
m_list = [m1, m2, m3]
for mm in m_list:
mm_dict = mm.to_dict()
mm_r = GPy.core.mapping.Mapping.from_dict(mm_dict)
assert type(mm) == type(mm_r)
assert type(mm.input_dim) == type(mm_r.input_dim)
assert type(mm.output_dim) == type(mm_r.output_dim)
def test_serialize_deserialize_likelihoods(self):
l1 = GPy.likelihoods.Gaussian(GPy.likelihoods.link_functions.Identity(),variance=3.0)
l1_r = GPy.likelihoods.likelihood.Likelihood.from_dict(l1.to_dict())
l2 = GPy.likelihoods.Bernoulli(GPy.likelihoods.link_functions.Probit())
l2_r = GPy.likelihoods.likelihood.Likelihood.from_dict(l2.to_dict())
assert type(l1) == type(l1_r)
assert np.all(l1.variance == l1_r.variance)
assert type(l2) == type(l2_r)
def test_serialize_deserialize_normalizers(self):
n1 = GPy.util.normalizer.Standardize()
n1.scale_by(np.random.rand(10))
n1_r = GPy.util.normalizer._Norm.from_dict((n1.to_dict()))
assert type(n1) == type(n1_r)
assert np.all(n1.mean == n1_r.mean)
assert np.all(n1.std == n1_r.std)
def test_serialize_deserialize_link_functions(self):
l1 = GPy.likelihoods.link_functions.Identity()
l2 = GPy.likelihoods.link_functions.Probit()
l_list = [l1, l2]
for ll in l_list:
ll_dict = ll.to_dict()
ll_r = GPy.likelihoods.link_functions.GPTransformation.from_dict(ll_dict)
assert type(ll) == type(ll_r)
def test_serialize_deserialize_inference_methods(self):
e1 = GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode="nested")
e1.ga_approx_old = GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10))
e1._ep_approximation = []
e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.posteriorParams(np.random.rand(10),np.random.rand(100).reshape((10,10))))
e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10)))
e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.cavityParams(10))
e1._ep_approximation[-1].v = np.random.rand(10)
e1._ep_approximation[-1].tau = np.random.rand(10)
e1._ep_approximation.append(np.random.rand(10))
e1_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e1.to_dict())
assert type(e1) == type(e1_r)
assert e1.epsilon==e1_r.epsilon
assert e1.eta==e1_r.eta
assert e1.delta==e1_r.delta
assert e1.always_reset==e1_r.always_reset
assert e1.max_iters==e1_r.max_iters
assert e1.ep_mode==e1_r.ep_mode
assert e1.parallel_updates==e1_r.parallel_updates
np.testing.assert_array_equal(e1.ga_approx_old.tau[:], e1_r.ga_approx_old.tau[:])
np.testing.assert_array_equal(e1.ga_approx_old.v[:], e1_r.ga_approx_old.v[:])
np.testing.assert_array_equal(e1._ep_approximation[0].mu[:], e1_r._ep_approximation[0].mu[:])
np.testing.assert_array_equal(e1._ep_approximation[0].Sigma[:], e1_r._ep_approximation[0].Sigma[:])
np.testing.assert_array_equal(e1._ep_approximation[1].tau[:], e1_r._ep_approximation[1].tau[:])
np.testing.assert_array_equal(e1._ep_approximation[1].v[:], e1_r._ep_approximation[1].v[:])
np.testing.assert_array_equal(e1._ep_approximation[2].tau[:], e1_r._ep_approximation[2].tau[:])
np.testing.assert_array_equal(e1._ep_approximation[2].v[:], e1_r._ep_approximation[2].v[:])
np.testing.assert_array_equal(e1._ep_approximation[3][:], e1_r._ep_approximation[3][:])
e2 = GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference()
e2_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e2.to_dict())
assert type(e2) == type(e2_r)
def test_serialize_deserialize_model(self):
np.random.seed(fixed_seed)
N = 20
Nhalf = int(N/2)
X = np.hstack([np.random.normal(5, 2, Nhalf), np.random.normal(10, 2, Nhalf)])[:, None]
Y = np.hstack([np.ones(Nhalf), np.zeros(Nhalf)])[:, None]
kernel = GPy.kern.RBF(1)
likelihood = GPy.likelihoods.Bernoulli()
inference_method=GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode="nested")
mean_function=None
m = GPy.core.GP(X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=inference_method, mean_function=mean_function, normalizer=True, name='gp_classification')
m.optimize()
m.save_model("temp_test_gp_with_data.json", compress=True, save_data=True)
m.save_model("temp_test_gp_without_data.json", compress=True, save_data=False)
m1_r = GPy.core.GP.load_model("temp_test_gp_with_data.json.zip")
m2_r = GPy.core.GP.load_model("temp_test_gp_without_data.json.zip", (X,Y))
import os
os.remove("temp_test_gp_with_data.json.zip")
os.remove("temp_test_gp_without_data.json.zip")
var = m.predict(X)[0]
var1_r = m1_r.predict(X)[0]
var2_r = m2_r.predict(X)[0]
np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
np.testing.assert_array_equal(np.array(var).flatten(), np.array(var2_r).flatten())
def test_serialize_deserialize_inference_GPRegressor(self):
np.random.seed(fixed_seed)
N = 50
N_new = 50
D = 1
X = np.random.uniform(-3., 3., (N, 1))
Y = np.sin(X) + np.random.randn(N, D) * 0.05
X_new = np.random.uniform(-3., 3., (N_new, 1))
k = GPy.kern.RBF(input_dim=1, lengthscale=10)
m = GPy.models.GPRegression(X,Y,k)
m.optimize()
m.save_model("temp_test_gp_regressor_with_data.json", compress=True, save_data=True)
m.save_model("temp_test_gp_regressor_without_data.json", compress=True, save_data=False)
m1_r = GPy.models.GPRegression.load_model("temp_test_gp_regressor_with_data.json.zip")
m2_r = GPy.models.GPRegression.load_model("temp_test_gp_regressor_without_data.json.zip", (X,Y))
import os
os.remove("temp_test_gp_regressor_with_data.json.zip")
os.remove("temp_test_gp_regressor_without_data.json.zip")
Xp = np.random.uniform(size=(int(1e5),1))
Xp[:,0] = Xp[:,0]*15-5
_, var = m.predict(Xp)
_, var1_r = m1_r.predict(Xp)
_, var2_r = m2_r.predict(Xp)
np.testing.assert_array_equal(var.flatten(), var1_r.flatten())
np.testing.assert_array_equal(var.flatten(), var2_r.flatten())
def test_serialize_deserialize_inference_GPClassifier(self):
np.random.seed(fixed_seed)
N = 50
Nhalf = int(N/2)
X = np.hstack([np.random.normal(5, 2, Nhalf), np.random.normal(10, 2, Nhalf)])[:, None]
Y = np.hstack([np.ones(Nhalf), np.zeros(Nhalf)])[:, None]
kernel = GPy.kern.RBF(1)
m = GPy.models.GPClassification(X, Y, kernel=kernel)
m.optimize()
m.save_model("temp_test_gp_classifier_with_data.json", compress=True, save_data=True)
m.save_model("temp_test_gp_classifier_without_data.json", compress=True, save_data=False)
m1_r = GPy.models.GPClassification.load_model("temp_test_gp_classifier_with_data.json.zip")
m2_r = GPy.models.GPClassification.load_model("temp_test_gp_classifier_without_data.json.zip", (X,Y))
import os
os.remove("temp_test_gp_classifier_with_data.json.zip")
os.remove("temp_test_gp_classifier_without_data.json.zip")
var = m.predict(X)[0]
var1_r = m1_r.predict(X)[0]
var2_r = m2_r.predict(X)[0]
np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_parameter_index_operations']
unittest.main()

View file

@ -17,3 +17,4 @@ from . import multioutput
from . import parallel from . import parallel
from . import functions from . import functions
from . import cluster_with_offset from . import cluster_with_offset
from . import input_warping_functions

View file

@ -0,0 +1,262 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.parameterization import Parameterized, Param
from ..core.parameterization.priors import LogGaussian
class InputWarpingFunction(Parameterized):
"""Abstract class for input warping functions
"""
def __init__(self, name):
super(InputWarpingFunction, self).__init__(name=name)
def f(self, X, test=False):
raise NotImplementedError
def fgrad_x(self, X):
raise NotImplementedError
def update_grads(self, X, dL_dW):
raise NotImplementedError
class IdentifyWarping(InputWarpingFunction):
"""The identity warping function, for testing"""
def __init__(self):
super(IdentifyWarping, self).__init__(name='input_warp_identity')
def f(self, X, test_data=False):
return X
def fgrad_X(self, X):
return np.zeros(X.shape)
def update_grads(self, X, dL_dW):
pass
class InputWarpingTest(InputWarpingFunction):
"""The identity warping function, for testing"""
def __init__(self):
super(InputWarpingTest, self).__init__(name='input_warp_test')
self.a = Param('a', 1.0)
self.set_prior(LogGaussian(0.0, 0.75))
self.link_parameter(self.a)
def f(self, X, test_data=False):
return X * self.a
def fgrad_X(self, X):
return self.ones(X.shape) * self.a
def update_grads(self, X, dL_dW):
self.a.gradient[:] = np.sum(dL_dW * X)
class KumarWarping(InputWarpingFunction):
"""Kumar Warping for input data
Parameters
----------
X : array_like, shape = (n_samples, n_features)
The input data that is going to be warped
warping_indices: list of int, optional
The features that are going to be warped
Default to warp all the features
epsilon: float, optional
Used to normalized input data to [0+e, 1-e]
Default to 1e-6
Xmin : list of float, Optional
The min values for each feature defined by users
Default to the train minimum
Xmax : list of float, Optional
The max values for each feature defined by users
Default to the train maximum
Attributes
----------
warping_indices: list of int
The features that are going to be warped
Default to warp all the features
warping_dim: int
The number of features to be warped
Xmin : list of float
The min values for each feature defined by users
Default to the train minimum
Xmax : list of float
The max values for each feature defined by users
Default to the train maximum
epsilon: float
Used to normalized input data to [0+e, 1-e]
Default to 1e-6
X_normalized : array_like, shape = (n_samples, n_features)
The normalized training X
scaling : list of float, length = n_features in X
Defined as 1.0 / (self.Xmax - self.Xmin)
params : list of Param
The list of all the parameters used in Kumar Warping
num_parameters: int
The number of parameters used in Kumar Warping
"""
def __init__(self, X, warping_indices=None, epsilon=None, Xmin=None, Xmax=None):
super(KumarWarping, self).__init__(name='input_warp_kumar')
if warping_indices is not None and np.max(warping_indices) > X.shape[1] -1:
raise ValueError("Kumar warping indices exceed feature dimension")
if warping_indices is not None and np.min(warping_indices) < 0:
raise ValueError("Kumar warping indices should be larger than 0")
if warping_indices is not None and np.any(list(map(lambda x: not isinstance(x, int), warping_indices))):
raise ValueError("Kumar warping indices should be integer")
if Xmin is None and Xmax is None:
Xmin = X.min(axis=0)
Xmax = X.max(axis=0)
else:
if Xmin is None or Xmax is None:
raise ValueError("Xmin and Xmax need to be provide at the same time!")
if len(Xmin) != X.shape[1] or len(Xmax) != X.shape[1]:
raise ValueError("Xmin and Xmax should have n_feature values!")
if epsilon is None:
epsilon = 1e-6
self.epsilon = epsilon
self.Xmin = Xmin - self.epsilon
self.Xmax = Xmax + self.epsilon
self.scaling = 1.0 / (self.Xmax - self.Xmin)
self.X_normalized = (X - self.Xmin) / (self.Xmax - self.Xmin)
if warping_indices is None:
warping_indices = range(X.shape[1])
self.warping_indices = warping_indices
self.warping_dim = len(self.warping_indices)
self.num_parameters = 2 * self.warping_dim
# create parameters
self.params = [[Param('a%d' % i, 1.0), Param('b%d' % i, 1.0)] for i in range(self.warping_dim)]
# add constraints
for i in range(self.warping_dim):
self.params[i][0].constrain_bounded(0.0, 10.0)
self.params[i][1].constrain_bounded(0.0, 10.0)
# set priors and add them into handler
for i in range(self.warping_dim):
self.params[i][0].set_prior(LogGaussian(0.0, 0.75))
self.params[i][1].set_prior(LogGaussian(0.0, 0.75))
self.link_parameter(self.params[i][0])
self.link_parameter(self.params[i][1])
def f(self, X, test_data=False):
"""Apply warping_function to some Input data
Parameters:
-----------
X : array_like, shape = (n_samples, n_features)
test_data: bool, optional
Default to False, should set to True when transforming test data
Returns
-------
X_warped : array_like, shape = (n_samples, n_features)
The warped input data
Math
----
f(x) = 1 - (1 - x^a)^b
"""
X_warped = X.copy()
if test_data:
X_normalized = (X - self.Xmin) / (self.Xmax - self.Xmin)
else:
X_normalized = self.X_normalized
for i_seq, i_fea in enumerate(self.warping_indices):
a, b = self.params[i_seq][0], self.params[i_seq][1]
X_warped[:, i_fea] = 1 - np.power(1 - np.power(X_normalized[:, i_fea], a), b)
return X_warped
def fgrad_X(self, X):
"""Compute the gradient of warping function with respect to X
Parameters
----------
X : array_like, shape = (n_samples, n_features)
The location to compute gradient
Returns
-------
grad : array_like, shape = (n_samples, n_features)
The gradient for every location at X
Math
----
grad = a * b * x ^(a-1) * (1 - x^a)^(b-1)
"""
grad = np.zeros(X.shape)
for i_seq, i_fea in enumerate(self.warping_indices):
a, b = self.params[i_seq][0], self.params[i_seq][1]
grad[:, i_fea] = a * b * np.power(self.X_normalized[:, i_fea], a-1) * \
np.power(1 - np.power(self.X_normalized[:, i_fea], a), b-1) * self.scaling[i_fea]
return grad
def update_grads(self, X, dL_dW):
"""Update the gradients of marginal log likelihood with respect to the parameters of warping function
Parameters
----------
X : array_like, shape = (n_samples, n_features)
The input BEFORE warping
dL_dW : array_like, shape = (n_samples, n_features)
The gradient of marginal log likelihood with respect to the Warped input
Math
----
let w = f(x), the input after warping, then
dW_da = b * (1 - x^a)^(b - 1) * x^a * ln(x)
dW_db = - (1 - x^a)^b * ln(1 - x^a)
dL_da = dL_dW * dW_da
dL_db = dL_dW * dW_db
"""
for i_seq, i_fea in enumerate(self.warping_indices):
ai, bi = self.params[i_seq][0], self.params[i_seq][1]
# cache some value for save some computation
x_pow_a = np.power(self.X_normalized[:, i_fea], ai)
# compute gradient for ai, bi on all X
dz_dai = bi * np.power(1 - x_pow_a, bi-1) * x_pow_a * np.log(self.X_normalized[:, i_fea])
dz_dbi = - np.power(1 - x_pow_a, bi) * np.log(1 - x_pow_a)
# sum gradients on all the data
dL_dai = np.sum(dL_dW[:, i_fea] * dz_dai)
dL_dbi = np.sum(dL_dW[:, i_fea] * dz_dbi)
self.params[i_seq][0].gradient[:] = dL_dai
self.params[i_seq][1].gradient[:] = dL_dbi

View file

@ -33,6 +33,27 @@ class _Norm(object):
""" """
raise NotImplementedError raise NotImplementedError
def to_dict(self):
raise NotImplementedError
def _to_dict(self):
input_dict = {}
return input_dict
@staticmethod
def from_dict(input_dict):
import copy
input_dict = copy.deepcopy(input_dict)
normalizer_class = input_dict.pop('class')
import GPy
normalizer_class = eval(normalizer_class)
return normalizer_class._from_dict(normalizer_class, input_dict)
@staticmethod
def _from_dict(normalizer_class, input_dict):
return normalizer_class(**input_dict)
class Standardize(_Norm): class Standardize(_Norm):
def __init__(self): def __init__(self):
self.mean = None self.mean = None
@ -50,9 +71,26 @@ class Standardize(_Norm):
def scaled(self): def scaled(self):
return self.mean is not None return self.mean is not None
def to_dict(self):
input_dict = super(Standardize, self)._to_dict()
input_dict["class"] = "GPy.util.normalizer.Standardize"
if self.mean is not None:
input_dict["mean"] = self.mean.tolist()
input_dict["std"] = self.std.tolist()
return input_dict
@staticmethod
def _from_dict(kernel_class, input_dict):
s = Standardize()
if "mean" in input_dict:
s.mean = np.array(input_dict["mean"])
if "std" in input_dict:
s.std = np.array(input_dict["std"])
return s
# Inverse variance to be implemented, disabling for now # Inverse variance to be implemented, disabling for now
# If someone in the future want to implement this, # If someone in the future want to implement this,
# we need to implement the inverse variance for # we need to implement the inverse variance for
# normalization. This means, we need to know the factor # normalization. This means, we need to know the factor
# for the variance to be multiplied to the variance in # for the variance to be multiplied to the variance in
# normalized space. This is easy to compute for standardization # normalized space. This is easy to compute for standardization
@ -71,7 +109,7 @@ class Standardize(_Norm):
# def inverse_mean(self, X): # def inverse_mean(self, X):
# return (X + .5) * (self.ymax - self.ymin) + self.ymin # return (X + .5) * (self.ymax - self.ymin) + self.ymin
# def inverse_variance(self, var): # def inverse_variance(self, var):
# #
# return (var*(self.std**2)) # return (var*(self.std**2))
# def scaled(self): # def scaled(self):
# return (self.ymin is not None) and (self.ymax is not None) # return (self.ymin is not None) and (self.ymax is not None)

119
GPy/util/quad_integrate.py Normal file
View file

@ -0,0 +1,119 @@
"""
The file for utilities related to integration by quadrature methods
- will contain implementation for gaussian-kronrod integration.
"""
import numpy as np
def getSubs(Subs, XK, NK=1):
M = (Subs[1, :] - Subs[0, :]) / 2
C = (Subs[1, :] + Subs[0, :]) / 2
I = XK[:, None] * M + np.ones((NK, 1)) * C
# A = [Subs(1,:); I]
A = np.vstack((Subs[0, :], I))
# B = [I;Subs(2,:)]
B = np.vstack((I, Subs[1, :]))
# Subs = [reshape(A, 1, []);
A = A.flatten()
# reshape(B, 1, [])];
B = B.flatten()
Subs = np.vstack((A,B))
# Subs = np.concatenate((A, B), axis=0)
return Subs
def quadvgk(feval, fmin, fmax, tol1=1e-5, tol2=1e-5):
"""
numpy implementation makes use of the code here: http://se.mathworks.com/matlabcentral/fileexchange/18801-quadvgk
We here use gaussian kronrod integration already used in gpstuff for evaluating one dimensional integrals.
This is vectorised quadrature which means that several functions can be evaluated at the same time over a grid of
points.
:param f:
:param fmin:
:param fmax:
:param difftol:
:return:
"""
XK = np.array([-0.991455371120813, -0.949107912342759, -0.864864423359769, -0.741531185599394,
-0.586087235467691, -0.405845151377397, -0.207784955007898, 0.,
0.207784955007898, 0.405845151377397, 0.586087235467691,
0.741531185599394, 0.864864423359769, 0.949107912342759, 0.991455371120813])
WK = np.array([0.022935322010529, 0.063092092629979, 0.104790010322250, 0.140653259715525,
0.169004726639267, 0.190350578064785, 0.204432940075298, 0.209482141084728,
0.204432940075298, 0.190350578064785, 0.169004726639267,
0.140653259715525, 0.104790010322250, 0.063092092629979, 0.022935322010529])
# 7-point Gaussian weightings
WG = np.array([0.129484966168870, 0.279705391489277, 0.381830050505119, 0.417959183673469,
0.381830050505119, 0.279705391489277, 0.129484966168870])
NK = WK.size
G = np.arange(2,NK,2)
tol1 = 1e-4
tol2 = 1e-4
Subs = np.array([[fmin],[fmax]])
# number of functions to evaluate in the feval vector of functions.
NF = feval(np.zeros(1)).size
Q = np.zeros(NF)
neval = 0
while Subs.size > 0:
Subs = getSubs(Subs,XK)
M = (Subs[1,:] - Subs[0,:]) / 2
C = (Subs[1,:] + Subs[0,:]) / 2
# NM = length(M);
NM = M.size
# x = reshape(XK * M + ones(NK, 1) * C, 1, []);
x = XK[:,None]*M + C
x = x.flatten()
FV = feval(x)
# FV = FV[:,None]
Q1 = np.zeros((NF, NM))
Q2 = np.zeros((NF, NM))
# for n=1:NF
# F = reshape(FV(n,:), NK, []);
# Q1(n,:) = M. * sum((WK * ones(1, NM)). * F);
# Q2(n,:) = M. * sum((WG * ones(1, NM)). * F(G,:));
# end
# for i in range(NF):
# F = FV
# F = F.reshape((NK,-1))
# temp_mat = np.sum(np.multiply(WK[:,None]*np.ones((1,NM)), F),axis=0)
# Q1[i,:] = np.multiply(M, temp_mat)
# temp_mat = np.sum(np.multiply(WG[:,None]*np.ones((1, NM)), F[G-1,:]), axis=0)
# Q2[i,:] = np.multiply(M, temp_mat)
# ind = np.where(np.logical_or(np.max(np.abs(Q1 -Q2) / Q1) < tol1, (Subs[1,:] - Subs[0,:]) <= tol2) > 0)[0]
# Q = Q + np.sum(Q1[:,ind], axis=1)
# np.delete(Subs, ind,axis=1)
Q1 = np.dot(FV.reshape(NF, NK, NM).swapaxes(2,1),WK)*M
Q2 = np.dot(FV.reshape(NF, NK, NM).swapaxes(2,1)[:,:,1::2],WG)*M
#ind = np.nonzero(np.logical_or(np.max(np.abs((Q1-Q2)/Q1), 0) < difftol , M < xtol))[0]
ind = np.nonzero(np.logical_or(np.max(np.abs((Q1-Q2)), 0) < tol1 , (Subs[1,:] - Subs[0,:]) < tol2))[0]
Q = Q + np.sum(Q1[:,ind], axis=1)
Subs = np.delete(Subs, ind, axis=1)
return Q
def quadgk_int(f, fmin=-np.inf, fmax=np.inf, difftol=0.1):
"""
Integrate f from fmin to fmax,
do integration by substitution
x = r / (1-r**2)
when r goes from -1 to 1 , x goes from -inf to inf.
the interval for quadgk function is from -1 to +1, so we transform the space from (-inf,inf) to (-1,1)
:param f:
:param fmin:
:param fmax:
:param difftol:
:return:
"""
difftol = 1e-4
def trans_func(r):
r2 = np.square(r)
x = r / (1-r2)
dx_dr = (1 + r2)/(1-r2)**2
return f(x)*dx_dr
integrand = quadvgk(trans_func, -1., 1., difftol, difftol)
return integrand

View file

@ -3,7 +3,7 @@ environment:
secure: 8/ZjXFwtd1S7ixd7PJOpptupKKEDhm2da/q3unabJ00= secure: 8/ZjXFwtd1S7ixd7PJOpptupKKEDhm2da/q3unabJ00=
COVERALLS_REPO_TOKEN: COVERALLS_REPO_TOKEN:
secure: d3Luic/ESkGaWnZrvWZTKrzO+xaVwJWaRCEP0F+K/9DQGPSRZsJ/Du5g3s4XF+tS secure: d3Luic/ESkGaWnZrvWZTKrzO+xaVwJWaRCEP0F+K/9DQGPSRZsJ/Du5g3s4XF+tS
gpy_version: 1.7.7 gpy_version: 1.8.0
matrix: matrix:
- PYTHON_VERSION: 2.7 - PYTHON_VERSION: 2.7
MINICONDA: C:\Miniconda-x64 MINICONDA: C:\Miniconda-x64

View file

@ -1,5 +1,5 @@
[bumpversion] [bumpversion]
current_version = 1.7.7 current_version = 1.8.0
tag = True tag = True
commit = True commit = True