mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-15 06:52:39 +02:00
commit
d40db2b9af
47 changed files with 14431 additions and 273 deletions
11416
CHANGELOG.md
11416
CHANGELOG.md
File diff suppressed because it is too large
Load diff
|
|
@ -1 +1 @@
|
|||
__version__ = "1.7.7"
|
||||
__version__ = "1.8.0"
|
||||
|
|
|
|||
|
|
@ -109,6 +109,68 @@ class GP(Model):
|
|||
self.link_parameter(self.likelihood)
|
||||
self.posterior = None
|
||||
|
||||
def to_dict(self, save_data=True):
|
||||
input_dict = super(GP, self)._to_dict()
|
||||
input_dict["class"] = "GPy.core.GP"
|
||||
if not save_data:
|
||||
input_dict["X"] = None
|
||||
input_dict["Y"] = None
|
||||
else:
|
||||
try:
|
||||
input_dict["X"] = self.X.values.tolist()
|
||||
except:
|
||||
input_dict["X"] = self.X.tolist()
|
||||
try:
|
||||
input_dict["Y"] = self.Y.values.tolist()
|
||||
except:
|
||||
input_dict["Y"] = self.Y.tolist()
|
||||
input_dict["kernel"] = self.kern.to_dict()
|
||||
input_dict["likelihood"] = self.likelihood.to_dict()
|
||||
if self.mean_function is not None:
|
||||
input_dict["mean_function"] = self.mean_function.to_dict()
|
||||
input_dict["inference_method"] = self.inference_method.to_dict()
|
||||
#FIXME: Assumes the Y_metadata is serializable. We should create a Metadata class
|
||||
if self.Y_metadata is not None:
|
||||
input_dict["Y_metadata"] = self.Y_metadata
|
||||
if self.normalizer is not None:
|
||||
input_dict["normalizer"] = self.normalizer.to_dict()
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(input_dict, data=None):
|
||||
import GPy
|
||||
import numpy as np
|
||||
if (input_dict['X'] is None) or (input_dict['Y'] is None):
|
||||
assert(data is not None)
|
||||
input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])
|
||||
elif data is not None:
|
||||
print("WARNING: The model has been saved with X,Y! The original values are being overriden!")
|
||||
input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1])
|
||||
else:
|
||||
input_dict["X"], input_dict["Y"] = np.array(input_dict['X']), np.array(input_dict['Y'])
|
||||
input_dict["kernel"] = GPy.kern.Kern.from_dict(input_dict["kernel"])
|
||||
input_dict["likelihood"] = GPy.likelihoods.likelihood.Likelihood.from_dict(input_dict["likelihood"])
|
||||
mean_function = input_dict.get("mean_function")
|
||||
if mean_function is not None:
|
||||
input_dict["mean_function"] = GPy.core.mapping.Mapping.from_dict(mean_function)
|
||||
else:
|
||||
input_dict["mean_function"] = mean_function
|
||||
input_dict["inference_method"] = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(input_dict["inference_method"])
|
||||
|
||||
#FIXME: Assumes the Y_metadata is serializable. We should create a Metadata class
|
||||
Y_metadata = input_dict.get("Y_metadata")
|
||||
input_dict["Y_metadata"] = Y_metadata
|
||||
|
||||
normalizer = input_dict.get("normalizer")
|
||||
if normalizer is not None:
|
||||
input_dict["normalizer"] = GPy.util.normalizer._Norm.from_dict(normalizer)
|
||||
else:
|
||||
input_dict["normalizer"] = normalizer
|
||||
return GP(**input_dict)
|
||||
|
||||
def save_model(self, output_filename, compress=True, save_data=True):
|
||||
self._save_model(output_filename, compress=True, save_data=True)
|
||||
|
||||
# The predictive variable to be used to predict using the posterior object's
|
||||
# woodbury_vector and woodbury_inv is defined as predictive_variable
|
||||
# as long as the posterior has the right woodbury entries.
|
||||
|
|
@ -616,4 +678,3 @@ class GP(Model):
|
|||
"""
|
||||
mu_star, var_star = self._raw_predict(x_test)
|
||||
return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples)
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,30 @@ class Mapping(Parameterized):
|
|||
def update_gradients(self, dL_dF, X):
|
||||
raise NotImplementedError
|
||||
|
||||
def to_dict(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def _to_dict(self):
|
||||
input_dict = {}
|
||||
input_dict["input_dim"] = self.input_dim
|
||||
input_dict["output_dim"] = self.output_dim
|
||||
input_dict["name"] = self.name
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def from_dict(input_dict):
|
||||
import copy
|
||||
input_dict = copy.deepcopy(input_dict)
|
||||
mapping_class = input_dict.pop('class')
|
||||
input_dict["name"] = str(input_dict["name"])
|
||||
import GPy
|
||||
mapping_class = eval(mapping_class)
|
||||
return mapping_class._from_dict(mapping_class, input_dict)
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(mapping_class, input_dict):
|
||||
return mapping_class(**input_dict)
|
||||
|
||||
|
||||
class Bijective_mapping(Mapping):
|
||||
"""
|
||||
|
|
@ -37,5 +61,3 @@ class Bijective_mapping(Mapping):
|
|||
def g(self, f):
|
||||
"""Inverse mapping from output domain of the function to the inputs."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,61 @@ class Model(ParamzModel, Priorizable):
|
|||
def __init__(self, name):
|
||||
super(Model, self).__init__(name) # Parameterized.__init__(self)
|
||||
|
||||
def _to_dict(self):
|
||||
input_dict = {}
|
||||
input_dict["name"] = self.name
|
||||
return input_dict
|
||||
|
||||
def to_dict(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
def from_dict(input_dict, data=None):
|
||||
import copy
|
||||
input_dict = copy.deepcopy(input_dict)
|
||||
model_class = input_dict.pop('class')
|
||||
input_dict["name"] = str(input_dict["name"])
|
||||
import GPy
|
||||
model_class = eval(model_class)
|
||||
return model_class._from_dict(input_dict, data)
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(model_class, input_dict, data=None):
|
||||
return model_class(**input_dict)
|
||||
|
||||
def save_model(self, output_filename, compress=True, save_data=True):
|
||||
raise NotImplementedError
|
||||
|
||||
def _save_model(self, output_filename, compress=True, save_data=True):
|
||||
import json
|
||||
output_dict = self.to_dict(save_data)
|
||||
if compress:
|
||||
import gzip
|
||||
with gzip.GzipFile(output_filename + ".zip", 'w') as outfile:
|
||||
json_str = json.dumps(output_dict)
|
||||
json_bytes = json_str.encode('utf-8')
|
||||
outfile.write(json_bytes)
|
||||
else:
|
||||
with open(output_filename + ".json", 'w') as outfile:
|
||||
json.dump(output_dict, outfile)
|
||||
|
||||
@staticmethod
|
||||
def load_model(output_filename, data=None):
|
||||
compress = output_filename.split(".")[-1] == "zip"
|
||||
import json
|
||||
if compress:
|
||||
import gzip
|
||||
with gzip.GzipFile(output_filename, 'r') as json_data:
|
||||
json_bytes = json_data.read()
|
||||
json_str = json_bytes.decode('utf-8')
|
||||
output_dict = json.loads(json_str)
|
||||
else:
|
||||
with open(output_filename) as json_data:
|
||||
output_dict = json.load(json_data)
|
||||
import GPy
|
||||
return GPy.core.model.Model.from_dict(output_dict, data)
|
||||
|
||||
|
||||
def log_likelihood(self):
|
||||
raise NotImplementedError("this needs to be implemented to use the model class")
|
||||
|
||||
|
|
|
|||
|
|
@ -41,6 +41,26 @@ class LatentFunctionInference(object):
|
|||
"""
|
||||
pass
|
||||
|
||||
def _to_dict(self):
|
||||
input_dict = {}
|
||||
return input_dict
|
||||
|
||||
def to_dict(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
def from_dict(input_dict):
|
||||
import copy
|
||||
input_dict = copy.deepcopy(input_dict)
|
||||
inference_class = input_dict.pop('class')
|
||||
import GPy
|
||||
inference_class = eval(inference_class)
|
||||
return inference_class._from_dict(inference_class, input_dict)
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(inference_class, input_dict):
|
||||
return inference_class(**input_dict)
|
||||
|
||||
class InferenceMethodList(LatentFunctionInference, list):
|
||||
|
||||
def on_optimization_start(self):
|
||||
|
|
|
|||
|
|
@ -21,6 +21,11 @@ class ExactGaussianInference(LatentFunctionInference):
|
|||
def __init__(self):
|
||||
pass#self._YYTfactor_cache = caching.cache()
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(ExactGaussianInference, self)._to_dict()
|
||||
input_dict["class"] = "GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference"
|
||||
return input_dict
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, variance=None, Z_tilde=None):
|
||||
"""
|
||||
Returns a Posterior class containing essential quantities of the posterior
|
||||
|
|
|
|||
|
|
@ -6,11 +6,141 @@ from paramz import ObsAr
|
|||
from . import ExactGaussianInference, VarDTC
|
||||
from ...util import diag
|
||||
from .posterior import PosteriorEP as Posterior
|
||||
from ...likelihoods import Gaussian
|
||||
from . import LatentFunctionInference
|
||||
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
|
||||
#Four wrapper classes to help modularisation of different EP versions
|
||||
class marginalMoments(object):
|
||||
def __init__(self, num_data):
|
||||
self.Z_hat = np.empty(num_data,dtype=np.float64)
|
||||
self.mu_hat = np.empty(num_data,dtype=np.float64)
|
||||
self.sigma2_hat = np.empty(num_data,dtype=np.float64)
|
||||
|
||||
|
||||
class cavityParams(object):
|
||||
def __init__(self, num_data):
|
||||
self.tau = np.empty(num_data,dtype=np.float64)
|
||||
self.v = np.empty(num_data,dtype=np.float64)
|
||||
def _update_i(self, eta, ga_approx, post_params, i):
|
||||
self.tau[i] = 1./post_params.Sigma_diag[i] - eta*ga_approx.tau[i]
|
||||
self.v[i] = post_params.mu[i]/post_params.Sigma_diag[i] - eta*ga_approx.v[i]
|
||||
def to_dict(self):
|
||||
return {"tau": self.tau.tolist(), "v": self.v.tolist()}
|
||||
@staticmethod
|
||||
def from_dict(input_dict):
|
||||
c = cavityParams(len(input_dict["tau"]))
|
||||
c.tau = np.array(input_dict["tau"])
|
||||
c.v = np.array(input_dict["v"])
|
||||
return c
|
||||
|
||||
|
||||
class gaussianApproximation(object):
|
||||
def __init__(self, v, tau):
|
||||
self.tau = tau
|
||||
self.v = v
|
||||
def _update_i(self, eta, delta, post_params, marg_moments, i):
|
||||
#Site parameters update
|
||||
delta_tau = delta/eta*(1./marg_moments.sigma2_hat[i] - 1./post_params.Sigma_diag[i])
|
||||
delta_v = delta/eta*(marg_moments.mu_hat[i]/marg_moments.sigma2_hat[i] - post_params.mu[i]/post_params.Sigma_diag[i])
|
||||
tau_tilde_prev = self.tau[i]
|
||||
self.tau[i] += delta_tau
|
||||
|
||||
# Enforce positivity of tau_tilde. Even though this is guaranteed for logconcave sites, it is still possible
|
||||
# to get negative values due to numerical errors. Moreover, the value of tau_tilde should be positive in order to
|
||||
# update the marginal likelihood without runnint into instabilities issues.
|
||||
if self.tau[i] < np.finfo(float).eps:
|
||||
self.tau[i] = np.finfo(float).eps
|
||||
delta_tau = self.tau[i] - tau_tilde_prev
|
||||
self.v[i] += delta_v
|
||||
|
||||
return (delta_tau, delta_v)
|
||||
def to_dict(self):
|
||||
return {"tau": self.tau.tolist(), "v": self.v.tolist()}
|
||||
@staticmethod
|
||||
def from_dict(input_dict):
|
||||
return gaussianApproximation(np.array(input_dict["v"]), np.array(input_dict["tau"]))
|
||||
|
||||
|
||||
class posteriorParamsBase(object):
|
||||
def __init__(self, mu, Sigma_diag):
|
||||
self.mu = mu
|
||||
self.Sigma_diag = Sigma_diag
|
||||
def _update_rank1(self, *arg):
|
||||
pass
|
||||
|
||||
def _recompute(self, *arg):
|
||||
pass
|
||||
|
||||
class posteriorParams(posteriorParamsBase):
|
||||
def __init__(self, mu, Sigma, L=None):
|
||||
self.Sigma = Sigma
|
||||
self.L = L
|
||||
Sigma_diag = np.diag(self.Sigma)
|
||||
super(posteriorParams, self).__init__(mu, Sigma_diag)
|
||||
|
||||
def _update_rank1(self, delta_tau, ga_approx, i):
|
||||
ci = delta_tau/(1.+ delta_tau*self.Sigma_diag[i])
|
||||
DSYR(self.Sigma, self.Sigma[:,i].copy(), -ci)
|
||||
self.mu = np.dot(self.Sigma, ga_approx.v)
|
||||
def to_dict(self):
|
||||
#TODO: Implement a more memory efficient variant
|
||||
if self.L is None:
|
||||
return { "mu": self.mu.tolist(), "Sigma": self.Sigma.tolist()}
|
||||
else:
|
||||
return { "mu": self.mu.tolist(), "Sigma": self.Sigma.tolist(), "L": self.L.tolist()}
|
||||
@staticmethod
|
||||
def from_dict(input_dict):
|
||||
if "L" in input_dict:
|
||||
return posteriorParams(np.array(input_dict["mu"]), np.array(input_dict["Sigma"]), np.array(input_dict["L"]))
|
||||
else:
|
||||
return posteriorParams(np.array(input_dict["mu"]), np.array(input_dict["Sigma"]))
|
||||
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _recompute(K, ga_approx):
|
||||
num_data = len(ga_approx.tau)
|
||||
tau_tilde_root = np.sqrt(ga_approx.tau)
|
||||
Sroot_tilde_K = tau_tilde_root[:,None] * K
|
||||
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
|
||||
L = jitchol(B)
|
||||
V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
|
||||
Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \Sigma^(-1))^(-1)
|
||||
mu = np.dot(Sigma,ga_approx.v)
|
||||
return posteriorParams(mu=mu, Sigma=Sigma, L=L)
|
||||
|
||||
class posteriorParamsDTC(posteriorParamsBase):
|
||||
def __init__(self, mu, Sigma_diag):
|
||||
super(posteriorParamsDTC, self).__init__(mu, Sigma_diag)
|
||||
|
||||
def _update_rank1(self, LLT, Kmn, delta_v, delta_tau, i):
|
||||
#DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
|
||||
DSYR(LLT,Kmn[:,i].copy(),delta_tau)
|
||||
L = jitchol(LLT)
|
||||
V,info = dtrtrs(L,Kmn,lower=1)
|
||||
self.Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\top)^(-1)) K_mn
|
||||
si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i]
|
||||
self.mu += (delta_v-delta_tau*self.mu[i])*si
|
||||
#mu = np.dot(Sigma, v_tilde)
|
||||
|
||||
@staticmethod
|
||||
def _recompute(LLT0, Kmn, ga_approx):
|
||||
LLT = LLT0 + np.dot(Kmn*ga_approx.tau[None,:],Kmn.T)
|
||||
L = jitchol(LLT)
|
||||
V, _ = dtrtrs(L,Kmn,lower=1)
|
||||
#Sigma_diag = np.sum(V*V,-2)
|
||||
#Knmv_tilde = np.dot(Kmn,v_tilde)
|
||||
#mu = np.dot(V2.T,Knmv_tilde)
|
||||
Sigma = np.dot(V.T,V)
|
||||
mu = np.dot(Sigma, ga_approx.v)
|
||||
Sigma_diag = np.diag(Sigma).copy()
|
||||
return posteriorParamsDTC(mu, Sigma_diag), LLT
|
||||
|
||||
class EPBase(object):
|
||||
def __init__(self, epsilon=1e-6, eta=1., delta=1., always_reset=False, max_iters=np.inf, ep_mode="alternated", parallel_updates=False):
|
||||
def __init__(self, epsilon=1e-6, eta=1., delta=1., always_reset=False, max_iters=np.inf, ep_mode="alternated", parallel_updates=False, loading=False):
|
||||
"""
|
||||
The expectation-propagation algorithm.
|
||||
For nomenclature see Rasmussen & Williams 2006.
|
||||
|
|
@ -26,16 +156,20 @@ class EPBase(object):
|
|||
:max_iters: int
|
||||
:ep_mode: string. It can be "nested" (EP is run every time the Hyperparameters change) or "alternated" (It runs EP at the beginning and then optimize the Hyperparameters).
|
||||
:parallel_updates: boolean. If true, updates of the parameters of the sites in parallel
|
||||
:loading: boolean. If True, prevents the EP parameters to change. Hack used when loading a serialized model
|
||||
"""
|
||||
super(EPBase, self).__init__()
|
||||
|
||||
self.always_reset = always_reset
|
||||
self.epsilon, self.eta, self.delta, self.max_iters = epsilon, eta, delta, max_iters
|
||||
self.ep_mode = ep_mode
|
||||
self.parallel_updates = parallel_updates
|
||||
#FIXME: Hack for serialiation. If True, prevents the EP parameters to change when loading a serialized model
|
||||
self.loading = loading
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.old_mutilde, self.old_vtilde = None, None
|
||||
self.ga_approx_old = None
|
||||
self._ep_approximation = None
|
||||
|
||||
def on_optimization_start(self):
|
||||
|
|
@ -45,6 +179,11 @@ class EPBase(object):
|
|||
# TODO: update approximation in the end as well? Maybe even with a switch?
|
||||
pass
|
||||
|
||||
def _stop_criteria(self, ga_approx):
|
||||
tau_diff = np.mean(np.square(ga_approx.tau-self.ga_approx_old.tau))
|
||||
v_diff = np.mean(np.square(ga_approx.v-self.ga_approx_old.v))
|
||||
return ((tau_diff < self.epsilon) and (v_diff < self.epsilon))
|
||||
|
||||
def __setstate__(self, state):
|
||||
super(EPBase, self).__setstate__(state[0])
|
||||
self.epsilon, self.eta, self.delta = state[1]
|
||||
|
|
@ -53,9 +192,21 @@ class EPBase(object):
|
|||
def __getstate__(self):
|
||||
return [super(EPBase, self).__getstate__() , [self.epsilon, self.eta, self.delta]]
|
||||
|
||||
def _to_dict(self):
|
||||
input_dict = super(EPBase, self)._to_dict()
|
||||
input_dict["epsilon"]=self.epsilon
|
||||
input_dict["eta"]=self.eta
|
||||
input_dict["delta"]=self.delta
|
||||
input_dict["always_reset"]=self.always_reset
|
||||
input_dict["max_iters"]=self.max_iters
|
||||
input_dict["ep_mode"]=self.ep_mode
|
||||
input_dict["parallel_updates"]=self.parallel_updates
|
||||
input_dict["loading"]=True
|
||||
return input_dict
|
||||
|
||||
class EP(EPBase, ExactGaussianInference):
|
||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None):
|
||||
if self.always_reset:
|
||||
if self.always_reset and not self.loading:
|
||||
self.reset()
|
||||
|
||||
num_data, output_dim = Y.shape
|
||||
|
|
@ -64,22 +215,22 @@ class EP(EPBase, ExactGaussianInference):
|
|||
if K is None:
|
||||
K = kern.K(X)
|
||||
|
||||
if self.ep_mode=="nested":
|
||||
if self.ep_mode=="nested" and not self.loading:
|
||||
#Force EP at each step of the optimization
|
||||
self._ep_approximation = None
|
||||
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
||||
elif self.ep_mode=="alternated":
|
||||
post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
||||
elif self.ep_mode=="alternated" or self.loading:
|
||||
if getattr(self, '_ep_approximation', None) is None:
|
||||
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
|
||||
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
||||
post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
||||
else:
|
||||
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
|
||||
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation
|
||||
post_params, ga_approx, cav_params, log_Z_tilde = self._ep_approximation
|
||||
else:
|
||||
raise ValueError("ep_mode value not valid")
|
||||
|
||||
v_tilde = mu_tilde * tau_tilde
|
||||
return self._inference(K, tau_tilde, v_tilde, likelihood, Y_metadata=Y_metadata, Z_tilde=log_Z_tilde.sum())
|
||||
self.loading = False
|
||||
return self._inference(Y, K, ga_approx, cav_params, likelihood, Y_metadata=Y_metadata, Z_tilde=log_Z_tilde)
|
||||
|
||||
def expectation_propagation(self, K, Y, likelihood, Y_metadata):
|
||||
|
||||
|
|
@ -90,41 +241,57 @@ class EP(EPBase, ExactGaussianInference):
|
|||
# than ObsArrays
|
||||
Y = Y.values.copy()
|
||||
|
||||
#Initial values - Marginal moments
|
||||
Z_hat = np.empty(num_data,dtype=np.float64)
|
||||
mu_hat = np.empty(num_data,dtype=np.float64)
|
||||
sigma2_hat = np.empty(num_data,dtype=np.float64)
|
||||
#Initial values - Marginal moments, cavity params, gaussian approximation params and posterior params
|
||||
marg_moments = marginalMoments(num_data)
|
||||
cav_params = cavityParams(num_data)
|
||||
ga_approx, post_params = self._init_approximations(K, num_data)
|
||||
|
||||
tau_cav = np.empty(num_data,dtype=np.float64)
|
||||
v_cav = np.empty(num_data,dtype=np.float64)
|
||||
#Approximation
|
||||
stop = False
|
||||
iterations = 0
|
||||
while not stop and (iterations < self.max_iters):
|
||||
self._local_updates(num_data, cav_params, post_params, marg_moments, ga_approx, likelihood, Y, Y_metadata)
|
||||
|
||||
#(re) compute Sigma and mu using full Cholesky decompy
|
||||
post_params = posteriorParams._recompute(K, ga_approx)
|
||||
|
||||
#monitor convergence
|
||||
if iterations > 0:
|
||||
stop = self._stop_criteria(ga_approx)
|
||||
self.ga_approx_old = gaussianApproximation(ga_approx.v.copy(), ga_approx.tau.copy())
|
||||
iterations += 1
|
||||
|
||||
# Z_tilde after removing the terms that can lead to infinite terms due to tau_tilde close to zero.
|
||||
# This terms cancel with the coreresponding terms in the marginal loglikelihood
|
||||
log_Z_tilde = self._log_Z_tilde(marg_moments, ga_approx, cav_params)
|
||||
# - 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)
|
||||
return (post_params, ga_approx, cav_params, log_Z_tilde)
|
||||
|
||||
def _init_approximations(self, K, num_data):
|
||||
#initial values - Gaussian factors
|
||||
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
||||
if self.old_mutilde is None:
|
||||
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
|
||||
if self.ga_approx_old is None:
|
||||
v_tilde, tau_tilde = np.zeros((2, num_data))
|
||||
ga_approx = gaussianApproximation(v_tilde, tau_tilde)
|
||||
Sigma = K.copy()
|
||||
diag.add(Sigma, 1e-7)
|
||||
mu = np.zeros(num_data)
|
||||
post_params = posteriorParams(mu, Sigma)
|
||||
else:
|
||||
assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
|
||||
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
|
||||
tau_tilde = v_tilde/mu_tilde
|
||||
mu, Sigma, _ = self._ep_compute_posterior(K, tau_tilde, v_tilde)
|
||||
diag.add(Sigma, 1e-7)
|
||||
assert self.ga_approx_old.v.size == num_data, "data size mis-match: did you change the data? try resetting!"
|
||||
ga_approx = gaussianApproximation(self.ga_approx_old.v, self.ga_approx_old.tau)
|
||||
post_params = posteriorParams._recompute(K, ga_approx)
|
||||
diag.add(post_params.Sigma, 1e-7)
|
||||
# TODO: Check the log-marginal under both conditions and choose the best one
|
||||
return (ga_approx, post_params)
|
||||
|
||||
#Approximation
|
||||
tau_diff = self.epsilon + 1.
|
||||
v_diff = self.epsilon + 1.
|
||||
tau_tilde_old = np.nan
|
||||
v_tilde_old = np.nan
|
||||
iterations = 0
|
||||
while ((tau_diff > self.epsilon) or (v_diff > self.epsilon)) and (iterations < self.max_iters):
|
||||
def _local_updates(self, num_data, cav_params, post_params, marg_moments, ga_approx, likelihood, Y, Y_metadata, update_order=None):
|
||||
if update_order is None:
|
||||
update_order = np.random.permutation(num_data)
|
||||
for i in update_order:
|
||||
#Cavity distribution parameters
|
||||
tau_cav[i] = 1./Sigma[i,i] - self.eta*tau_tilde[i]
|
||||
v_cav[i] = mu[i]/Sigma[i,i] - self.eta*v_tilde[i]
|
||||
cav_params._update_i(self.eta, ga_approx, post_params, i)
|
||||
|
||||
if Y_metadata is not None:
|
||||
# Pick out the relavent metadata for Yi
|
||||
Y_metadata_i = {}
|
||||
|
|
@ -133,93 +300,77 @@ class EP(EPBase, ExactGaussianInference):
|
|||
else:
|
||||
Y_metadata_i = None
|
||||
#Marginal moments
|
||||
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i], Y_metadata_i=Y_metadata_i)
|
||||
#Site parameters update
|
||||
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
||||
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
||||
tau_tilde_prev = tau_tilde[i]
|
||||
tau_tilde[i] += delta_tau
|
||||
marg_moments.Z_hat[i], marg_moments.mu_hat[i], marg_moments.sigma2_hat[i] = likelihood.moments_match_ep(Y[i], cav_params.tau[i], cav_params.v[i], Y_metadata_i=Y_metadata_i)
|
||||
|
||||
# Enforce positivity of tau_tilde. Even though this is guaranteed for logconcave sites, it is still possible
|
||||
# to get negative values due to numerical errors. Moreover, the value of tau_tilde should be positive in order to
|
||||
# update the marginal likelihood without inestability issues.
|
||||
if tau_tilde[i] < np.finfo(float).eps:
|
||||
tau_tilde[i] = np.finfo(float).eps
|
||||
delta_tau = tau_tilde[i] - tau_tilde_prev
|
||||
v_tilde[i] += delta_v
|
||||
#Site parameters update
|
||||
delta_tau, delta_v = ga_approx._update_i(self.eta, self.delta, post_params, marg_moments, i)
|
||||
|
||||
if self.parallel_updates == False:
|
||||
#Posterior distribution parameters update
|
||||
ci = delta_tau/(1.+ delta_tau*Sigma[i,i])
|
||||
DSYR(Sigma, Sigma[:,i].copy(), -ci)
|
||||
mu = np.dot(Sigma, v_tilde)
|
||||
post_params._update_rank1(delta_tau, ga_approx, i)
|
||||
|
||||
#(re) compute Sigma and mu using full Cholesky decompy
|
||||
mu, Sigma, _ = self._ep_compute_posterior(K, tau_tilde, v_tilde)
|
||||
def _log_Z_tilde(self, marg_moments, ga_approx, cav_params):
|
||||
return np.sum((np.log(marg_moments.Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(1+ga_approx.tau/cav_params.tau) - 0.5 * ((ga_approx.v)**2 * 1./(cav_params.tau + ga_approx.tau))
|
||||
+ 0.5*(cav_params.v * ( ( (ga_approx.tau/cav_params.tau) * cav_params.v - 2.0 * ga_approx.v ) * 1./(cav_params.tau + ga_approx.tau)))))
|
||||
|
||||
#monitor convergence
|
||||
if iterations > 0:
|
||||
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
|
||||
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
|
||||
tau_tilde_old = tau_tilde.copy()
|
||||
v_tilde_old = v_tilde.copy()
|
||||
|
||||
iterations += 1
|
||||
|
||||
mu_tilde = v_tilde/tau_tilde
|
||||
mu_cav = v_cav/tau_cav
|
||||
sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde
|
||||
|
||||
# Z_tilde after removing the terms that can lead to infinite terms due to tau_tilde close to zero.
|
||||
# This terms cancel with the coreresponding terms in the marginal loglikelihood
|
||||
log_Z_tilde = (np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(1+tau_tilde/tau_cav)
|
||||
- 0.5 * ((v_tilde)**2 * 1./(tau_cav + tau_tilde)) + 0.5*(v_cav * ( ( (tau_tilde/tau_cav) * v_cav - 2.0 * v_tilde ) * 1./(tau_cav + tau_tilde))))
|
||||
# - 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)
|
||||
|
||||
self.old_mutilde = mu_tilde
|
||||
self.old_vtilde = v_tilde
|
||||
|
||||
return mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde
|
||||
|
||||
def _ep_compute_posterior(self, K, tau_tilde, v_tilde):
|
||||
num_data = len(tau_tilde)
|
||||
tau_tilde_root = np.sqrt(tau_tilde)
|
||||
Sroot_tilde_K = tau_tilde_root[:,None] * K
|
||||
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
|
||||
L = jitchol(B)
|
||||
V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
|
||||
Sigma = K - np.dot(V.T,V) #K - KS^(1/2)BS^(1/2)K = (K^(-1) + \Sigma^(-1))^(-1)
|
||||
mu = np.dot(Sigma,v_tilde)
|
||||
return (mu, Sigma, L)
|
||||
|
||||
def _ep_marginal(self, K, tau_tilde, v_tilde, Z_tilde):
|
||||
mu, Sigma, L = self._ep_compute_posterior(K, tau_tilde, v_tilde)
|
||||
def _ep_marginal(self, K, ga_approx, Z_tilde):
|
||||
post_params = posteriorParams._recompute(K, ga_approx)
|
||||
|
||||
# Gaussian log marginal excluding terms that can go to infinity due to arbitrarily small tau_tilde.
|
||||
# These terms cancel out with the terms excluded from Z_tilde
|
||||
B_logdet = np.sum(2.0*np.log(np.diag(L)))
|
||||
log_marginal = 0.5*(-len(tau_tilde) * log_2_pi - B_logdet + np.sum(v_tilde * np.dot(Sigma,v_tilde)))
|
||||
B_logdet = np.sum(2.0*np.log(np.diag(post_params.L)))
|
||||
log_marginal = 0.5*(-len(ga_approx.tau) * log_2_pi - B_logdet + np.sum(ga_approx.v * np.dot(post_params.Sigma,ga_approx.v)))
|
||||
log_marginal += Z_tilde
|
||||
|
||||
return log_marginal, mu, Sigma, L
|
||||
return log_marginal, post_params
|
||||
|
||||
def _inference(self, K, tau_tilde, v_tilde, likelihood, Z_tilde, Y_metadata=None):
|
||||
log_marginal, mu, Sigma, L = self._ep_marginal(K, tau_tilde, v_tilde, Z_tilde)
|
||||
def _inference(self, Y, K, ga_approx, cav_params, likelihood, Z_tilde, Y_metadata=None):
|
||||
log_marginal, post_params = self._ep_marginal(K, ga_approx, Z_tilde)
|
||||
|
||||
tau_tilde_root = np.sqrt(tau_tilde)
|
||||
tau_tilde_root = np.sqrt(ga_approx.tau)
|
||||
Sroot_tilde_K = tau_tilde_root[:,None] * K
|
||||
|
||||
aux_alpha , _ = dpotrs(L, np.dot(Sroot_tilde_K, v_tilde), lower=1)
|
||||
alpha = (v_tilde - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) /mu^(/tilde)
|
||||
LWi, _ = dtrtrs(L, np.diag(tau_tilde_root), lower=1)
|
||||
aux_alpha , _ = dpotrs(post_params.L, np.dot(Sroot_tilde_K, ga_approx.v), lower=1)
|
||||
alpha = (ga_approx.v - tau_tilde_root * aux_alpha)[:,None] #(K + Sigma^(\tilde))^(-1) /mu^(/tilde)
|
||||
LWi, _ = dtrtrs(post_params.L, np.diag(tau_tilde_root), lower=1)
|
||||
Wi = np.dot(LWi.T,LWi)
|
||||
symmetrify(Wi) #(K + Sigma^(\tilde))^(-1)
|
||||
|
||||
dL_dK = 0.5 * (tdot(alpha) - Wi)
|
||||
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata)
|
||||
|
||||
dL_dthetaL = likelihood.ep_gradients(Y, cav_params.tau, cav_params.v, np.diag(dL_dK), Y_metadata=Y_metadata, quad_mode='gh')
|
||||
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(EP, self)._to_dict()
|
||||
input_dict["class"] = "GPy.inference.latent_function_inference.expectation_propagation.EP"
|
||||
if self.ga_approx_old is not None:
|
||||
input_dict["ga_approx_old"] = self.ga_approx_old.to_dict()
|
||||
if self._ep_approximation is not None:
|
||||
input_dict["_ep_approximation"] = {}
|
||||
input_dict["_ep_approximation"]["post_params"] = self._ep_approximation[0].to_dict()
|
||||
input_dict["_ep_approximation"]["ga_approx"] = self._ep_approximation[1].to_dict()
|
||||
input_dict["_ep_approximation"]["cav_params"] = self._ep_approximation[2].to_dict()
|
||||
input_dict["_ep_approximation"]["log_Z_tilde"] = self._ep_approximation[3].tolist()
|
||||
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(inference_class, input_dict):
|
||||
ga_approx_old = input_dict.pop('ga_approx_old', None)
|
||||
if ga_approx_old is not None:
|
||||
ga_approx_old = gaussianApproximation.from_dict(ga_approx_old)
|
||||
_ep_approximation_dict = input_dict.pop('_ep_approximation', None)
|
||||
_ep_approximation = []
|
||||
if _ep_approximation is not None:
|
||||
_ep_approximation.append(posteriorParams.from_dict(_ep_approximation_dict["post_params"]))
|
||||
_ep_approximation.append(gaussianApproximation.from_dict(_ep_approximation_dict["ga_approx"]))
|
||||
_ep_approximation.append(cavityParams.from_dict(_ep_approximation_dict["cav_params"]))
|
||||
_ep_approximation.append(np.array(_ep_approximation_dict["log_Z_tilde"]))
|
||||
ee = EP(**input_dict)
|
||||
ee.ga_approx_old = ga_approx_old
|
||||
ee._ep_approximation = _ep_approximation
|
||||
return ee
|
||||
|
||||
class EPDTC(EPBase, VarDTC):
|
||||
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
|
||||
|
|
@ -244,24 +395,25 @@ class EPDTC(EPBase, VarDTC):
|
|||
if self.ep_mode=="nested":
|
||||
#Force EP at each step of the optimization
|
||||
self._ep_approximation = None
|
||||
mu, Sigma_diag, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
|
||||
post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
|
||||
elif self.ep_mode=="alternated":
|
||||
if getattr(self, '_ep_approximation', None) is None:
|
||||
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
|
||||
mu, Sigma_diag, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
|
||||
post_params, ga_approx, log_Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
|
||||
else:
|
||||
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
|
||||
mu, Sigma_diag, mu_tilde, tau_tilde, log_Z_tilde = self._ep_approximation
|
||||
post_params, ga_approx, log_Z_tilde = self._ep_approximation
|
||||
else:
|
||||
raise ValueError("ep_mode value not valid")
|
||||
|
||||
return super(EPDTC, self).inference(kern, X, Z, likelihood, mu_tilde,
|
||||
mu_tilde = ga_approx.v / ga_approx.tau.astype(float)
|
||||
|
||||
return super(EPDTC, self).inference(kern, X, Z, likelihood, ObsAr(mu_tilde[:,None]),
|
||||
mean_function=mean_function,
|
||||
Y_metadata=Y_metadata,
|
||||
precision=tau_tilde,
|
||||
precision=ga_approx.tau,
|
||||
Lm=Lm, dL_dKmm=dL_dKmm,
|
||||
psi0=psi0, psi1=psi1, psi2=psi2, Z_tilde=log_Z_tilde.sum())
|
||||
|
||||
psi0=psi0, psi1=psi1, psi2=psi2, Z_tilde=log_Z_tilde)
|
||||
|
||||
def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata):
|
||||
|
||||
|
|
@ -272,51 +424,78 @@ class EPDTC(EPBase, VarDTC):
|
|||
# than ObsArrays
|
||||
Y = Y.values.copy()
|
||||
|
||||
#Initial values - Marginal moments
|
||||
Z_hat = np.zeros(num_data,dtype=np.float64)
|
||||
mu_hat = np.zeros(num_data,dtype=np.float64)
|
||||
sigma2_hat = np.zeros(num_data,dtype=np.float64)
|
||||
#Initial values - Marginal moments, cavity params, gaussian approximation params and posterior params
|
||||
marg_moments = marginalMoments(num_data)
|
||||
cav_params = cavityParams(num_data)
|
||||
ga_approx, post_params, LLT0, LLT = self._init_approximations(Kmm, Kmn, num_data)
|
||||
|
||||
tau_cav = np.empty(num_data,dtype=np.float64)
|
||||
v_cav = np.empty(num_data,dtype=np.float64)
|
||||
#Approximation
|
||||
stop = False
|
||||
iterations = 0
|
||||
while not stop and (iterations < self.max_iters):
|
||||
self._local_updates(num_data, LLT0, LLT, Kmn, cav_params, post_params, marg_moments, ga_approx, likelihood, Y, Y_metadata)
|
||||
#(re) compute Sigma, Sigma_diag and mu using full Cholesky decompy
|
||||
post_params, LLT = posteriorParamsDTC._recompute(LLT0, Kmn, ga_approx)
|
||||
post_params.Sigma_diag = np.maximum(post_params.Sigma_diag, np.finfo(float).eps)
|
||||
|
||||
#monitor convergence
|
||||
if iterations > 0:
|
||||
stop = self._stop_criteria(ga_approx)
|
||||
self.ga_approx_old = gaussianApproximation(ga_approx.v.copy(), ga_approx.tau.copy())
|
||||
iterations += 1
|
||||
|
||||
log_Z_tilde = self._log_Z_tilde(marg_moments, ga_approx, cav_params)
|
||||
|
||||
return post_params, ga_approx, log_Z_tilde
|
||||
|
||||
def _log_Z_tilde(self, marg_moments, ga_approx, cav_params):
|
||||
mu_tilde = ga_approx.v/ga_approx.tau
|
||||
mu_cav = cav_params.v/cav_params.tau
|
||||
sigma2_sigma2tilde = 1./cav_params.tau + 1./ga_approx.tau
|
||||
|
||||
return np.sum((np.log(marg_moments.Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
|
||||
+ 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde)))
|
||||
|
||||
def _init_approximations(self, Kmm, Kmn, num_data):
|
||||
#initial values - Gaussian factors
|
||||
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
||||
LLT0 = Kmm.copy()
|
||||
Lm = jitchol(LLT0) #K_m = L_m L_m^\top
|
||||
Vm,info = dtrtrs(Lm,Kmn,lower=1)
|
||||
Vm,info = dtrtrs(Lm, Kmn,lower=1)
|
||||
# Lmi = dtrtri(Lm)
|
||||
# Kmmi = np.dot(Lmi.T,Lmi)
|
||||
# KmmiKmn = np.dot(Kmmi,Kmn)
|
||||
# Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
|
||||
Qnn_diag = np.sum(Vm*Vm,-2) #diag(Knm Kmm^(-1) Kmn)
|
||||
#diag.add(LLT0, 1e-8)
|
||||
if self.old_mutilde is None:
|
||||
if self.ga_approx_old is None:
|
||||
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
||||
LLT = LLT0.copy() #Sigma = K.copy()
|
||||
mu = np.zeros(num_data)
|
||||
Sigma_diag = Qnn_diag.copy() + 1e-8
|
||||
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
|
||||
v_tilde, tau_tilde = np.zeros((2, num_data))
|
||||
ga_approx = gaussianApproximation(v_tilde, tau_tilde)
|
||||
post_params = posteriorParamsDTC(mu, Sigma_diag)
|
||||
|
||||
else:
|
||||
assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
|
||||
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
|
||||
tau_tilde = v_tilde/mu_tilde
|
||||
mu, Sigma_diag, LLT = self._ep_compute_posterior(LLT0, Kmn, tau_tilde, v_tilde)
|
||||
Sigma_diag += 1e-8
|
||||
assert self.ga_approx_old.v.size == num_data, "data size mis-match: did you change the data? try resetting!"
|
||||
ga_approx = gaussianApproximation(self.ga_approx_old.v, self.ga_approx_old.tau)
|
||||
post_params, LLT = posteriorParamsDTC._recompute(LLT0, Kmn, ga_approx)
|
||||
post_params.Sigma_diag += 1e-8
|
||||
|
||||
# TODO: Check the log-marginal under both conditions and choose the best one
|
||||
|
||||
#Approximation
|
||||
tau_diff = self.epsilon + 1.
|
||||
v_diff = self.epsilon + 1.
|
||||
tau_tilde_old = np.nan
|
||||
v_tilde_old = np.nan
|
||||
iterations = 0
|
||||
while ((tau_diff > self.epsilon) or (v_diff > self.epsilon)) and (iterations < self.max_iters):
|
||||
return (ga_approx, post_params, LLT0, LLT)
|
||||
|
||||
def _local_updates(self, num_data, LLT0, LLT, Kmn, cav_params, post_params, marg_moments, ga_approx, likelihood, Y, Y_metadata, update_order=None):
|
||||
if update_order is None:
|
||||
update_order = np.random.permutation(num_data)
|
||||
for i in update_order:
|
||||
|
||||
#Cavity distribution parameters
|
||||
tau_cav[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
|
||||
v_cav[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
|
||||
cav_params._update_i(self.eta, ga_approx, post_params, i)
|
||||
|
||||
|
||||
if Y_metadata is not None:
|
||||
# Pick out the relavent metadata for Yi
|
||||
Y_metadata_i = {}
|
||||
|
|
@ -326,65 +505,10 @@ class EPDTC(EPBase, VarDTC):
|
|||
Y_metadata_i = None
|
||||
|
||||
#Marginal moments
|
||||
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i], Y_metadata_i=Y_metadata_i)
|
||||
marg_moments.Z_hat[i], marg_moments.mu_hat[i], marg_moments.sigma2_hat[i] = likelihood.moments_match_ep(Y[i], cav_params.tau[i], cav_params.v[i], Y_metadata_i=Y_metadata_i)
|
||||
#Site parameters update
|
||||
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||
tau_tilde_prev = tau_tilde[i]
|
||||
tau_tilde[i] += delta_tau
|
||||
|
||||
# Enforce positivity of tau_tilde. Even though this is guaranteed for logconcave sites, it is still possible
|
||||
# to get negative values due to numerical errors. Moreover, the value of tau_tilde should be positive in order to
|
||||
# update the marginal likelihood without inestability issues.
|
||||
if tau_tilde[i] < np.finfo(float).eps:
|
||||
tau_tilde[i] = np.finfo(float).eps
|
||||
delta_tau = tau_tilde[i] - tau_tilde_prev
|
||||
v_tilde[i] += delta_v
|
||||
delta_tau, delta_v = ga_approx._update_i(self.eta, self.delta, post_params, marg_moments, i)
|
||||
|
||||
#Posterior distribution parameters update
|
||||
if self.parallel_updates == False:
|
||||
#DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
|
||||
DSYR(LLT,Kmn[:,i].copy(),delta_tau)
|
||||
L = jitchol(LLT)
|
||||
V,info = dtrtrs(L,Kmn,lower=1)
|
||||
Sigma_diag = np.maximum(np.sum(V*V,-2), np.finfo(float).eps) #diag(K_nm (L L^\top)^(-1)) K_mn
|
||||
si = np.sum(V.T*V[:,i],-1) #(V V^\top)[:,i]
|
||||
mu += (delta_v-delta_tau*mu[i])*si
|
||||
#mu = np.dot(Sigma, v_tilde)
|
||||
|
||||
#(re) compute Sigma, Sigma_diag and mu using full Cholesky decompy
|
||||
mu, Sigma_diag, LLT = self._ep_compute_posterior(LLT0, Kmn, tau_tilde, v_tilde)
|
||||
Sigma_diag = np.maximum(Sigma_diag, np.finfo(float).eps)
|
||||
|
||||
#monitor convergence
|
||||
if iterations>0:
|
||||
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
|
||||
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
|
||||
tau_tilde_old = tau_tilde.copy()
|
||||
v_tilde_old = v_tilde.copy()
|
||||
iterations += 1
|
||||
|
||||
mu_tilde = v_tilde/tau_tilde
|
||||
mu_cav = v_cav/tau_cav
|
||||
sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde
|
||||
|
||||
log_Z_tilde = (np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
|
||||
+ 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde))
|
||||
|
||||
self.old_mutilde = mu_tilde
|
||||
self.old_vtilde = v_tilde
|
||||
|
||||
return mu, Sigma_diag, ObsAr(mu_tilde[:,None]), tau_tilde, log_Z_tilde
|
||||
|
||||
def _ep_compute_posterior(self, LLT0, Kmn, tau_tilde, v_tilde):
|
||||
LLT = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
|
||||
L = jitchol(LLT)
|
||||
V, _ = dtrtrs(L,Kmn,lower=1)
|
||||
#Sigma_diag = np.sum(V*V,-2)
|
||||
#Knmv_tilde = np.dot(Kmn,v_tilde)
|
||||
#mu = np.dot(V2.T,Knmv_tilde)
|
||||
Sigma = np.dot(V.T,V)
|
||||
mu = np.dot(Sigma,v_tilde)
|
||||
Sigma_diag = np.diag(Sigma).copy()
|
||||
|
||||
return (mu, Sigma_diag, LLT)
|
||||
post_params._update_rank1(LLT, Kmn, delta_v, delta_tau, i)
|
||||
|
|
|
|||
|
|
@ -43,6 +43,11 @@ class Add(CombinationKernel):
|
|||
else:
|
||||
return False
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Add, self)._to_dict()
|
||||
input_dict["class"] = str("GPy.kern.Add")
|
||||
return input_dict
|
||||
|
||||
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||
def K(self, X, X2=None, which_parts=None):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -60,6 +60,35 @@ class Kern(Parameterized):
|
|||
from .psi_comp import PSICOMP_GH
|
||||
self.psicomp = PSICOMP_GH()
|
||||
|
||||
def _to_dict(self):
|
||||
input_dict = {}
|
||||
input_dict["input_dim"] = self.input_dim
|
||||
if isinstance(self.active_dims, np.ndarray):
|
||||
input_dict["active_dims"] = self.active_dims.tolist()
|
||||
else:
|
||||
input_dict["active_dims"] = self.active_dims
|
||||
input_dict["name"] = self.name
|
||||
input_dict["useGPU"] = self.useGPU
|
||||
return input_dict
|
||||
|
||||
def to_dict(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@staticmethod
|
||||
def from_dict(input_dict):
|
||||
import copy
|
||||
input_dict = copy.deepcopy(input_dict)
|
||||
kernel_class = input_dict.pop('class')
|
||||
input_dict["name"] = str(input_dict["name"])
|
||||
import GPy
|
||||
kernel_class = eval(kernel_class)
|
||||
return kernel_class._from_dict(kernel_class, input_dict)
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(kernel_class, input_dict):
|
||||
return kernel_class(**input_dict)
|
||||
|
||||
|
||||
def __setstate__(self, state):
|
||||
self._all_dims_active = np.arange(0, max(state['active_dims']) + 1)
|
||||
super(Kern, self).__setstate__(state)
|
||||
|
|
@ -342,6 +371,21 @@ class CombinationKernel(Kern):
|
|||
self.extra_dims = extra_dims
|
||||
self.link_parameters(*kernels)
|
||||
|
||||
def _to_dict(self):
|
||||
input_dict = super(CombinationKernel, self)._to_dict()
|
||||
input_dict["parts"] = {}
|
||||
for ii in range(len(self.parts)):
|
||||
input_dict["parts"][ii] = self.parts[ii].to_dict()
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(kernel_class, input_dict):
|
||||
parts = input_dict.pop('parts', None)
|
||||
subkerns = []
|
||||
for pp in parts:
|
||||
subkerns.append(Kern.from_dict(parts[pp]))
|
||||
return kernel_class(subkerns)
|
||||
|
||||
@property
|
||||
def parts(self):
|
||||
return self.parameters
|
||||
|
|
|
|||
|
|
@ -51,6 +51,18 @@ class Linear(Kern):
|
|||
self.link_parameter(self.variances)
|
||||
self.psicomp = PSICOMP_Linear()
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Linear, self)._to_dict()
|
||||
input_dict["class"] = "GPy.kern.Linear"
|
||||
input_dict["variances"] = self.variances.values.tolist()
|
||||
input_dict["ARD"] = self.ARD
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(kernel_class, input_dict):
|
||||
useGPU = input_dict.pop('useGPU', None)
|
||||
return Linear(**input_dict)
|
||||
|
||||
@Cache_this(limit=3)
|
||||
def K(self, X, X2=None):
|
||||
if self.ARD:
|
||||
|
|
@ -211,5 +223,3 @@ class LinearFull(Kern):
|
|||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||
return 2.*np.einsum('jk,i,ij->ik', P, dL_dKdiag, X)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -400,4 +400,3 @@ class PeriodicMatern52(Periodic):
|
|||
self.variance.gradient = np.sum(dK_dvar*dL_dK)
|
||||
self.lengthscale.gradient = np.sum(dK_dlen*dL_dK)
|
||||
self.period.gradient = np.sum(dK_dper*dL_dK)
|
||||
|
||||
|
|
|
|||
|
|
@ -39,6 +39,11 @@ class Prod(CombinationKernel):
|
|||
kernels.insert(i, part)
|
||||
super(Prod, self).__init__(kernels, name)
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Prod, self)._to_dict()
|
||||
input_dict["class"] = str("GPy.kern.Prod")
|
||||
return input_dict
|
||||
|
||||
@Cache_this(limit=3, force_kwargs=['which_parts'])
|
||||
def K(self, X, X2=None, which_parts=None):
|
||||
if which_parts is None:
|
||||
|
|
|
|||
|
|
@ -31,6 +31,14 @@ class RBF(Stationary):
|
|||
self.inv_l = Param('inv_lengthscale',1./self.lengthscale**2, Logexp())
|
||||
self.link_parameter(self.inv_l)
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(RBF, self)._to_dict()
|
||||
input_dict["class"] = "GPy.kern.RBF"
|
||||
input_dict["inv_l"] = self.use_invLengthscale
|
||||
if input_dict["inv_l"] == True:
|
||||
input_dict["lengthscale"] = np.sqrt(1 / float(self.inv_l))
|
||||
return input_dict
|
||||
|
||||
def K_of_r(self, r):
|
||||
return self.variance * np.exp(-0.5 * r**2)
|
||||
|
||||
|
|
|
|||
|
|
@ -93,6 +93,17 @@ class StdPeriodic(Kern):
|
|||
|
||||
self.link_parameters(self.variance, self.period, self.lengthscale)
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(StdPeriodic, self)._to_dict()
|
||||
input_dict["class"] = "GPy.kern.StdPeriodic"
|
||||
input_dict["variance"] = self.variance.values.tolist()
|
||||
input_dict["period"] = self.period.values.tolist()
|
||||
input_dict["lengthscale"] = self.lengthscale.values.tolist()
|
||||
input_dict["ARD1"] = self.ARD1
|
||||
input_dict["ARD2"] = self.ARD2
|
||||
return input_dict
|
||||
|
||||
|
||||
def parameters_changed(self):
|
||||
"""
|
||||
This functions deals as a callback for each optimization iteration.
|
||||
|
|
|
|||
|
|
@ -14,6 +14,11 @@ class Static(Kern):
|
|||
self.variance = Param('variance', variance, Logexp())
|
||||
self.link_parameters(self.variance)
|
||||
|
||||
def _to_dict(self):
|
||||
input_dict = super(Static, self)._to_dict()
|
||||
input_dict["variance"] = self.variance.values.tolist()
|
||||
return input_dict
|
||||
|
||||
def Kdiag(self, X):
|
||||
ret = np.empty((X.shape[0],), dtype=np.float64)
|
||||
ret[:] = self.variance
|
||||
|
|
@ -133,6 +138,16 @@ class Bias(Static):
|
|||
def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
|
||||
super(Bias, self).__init__(input_dim, variance, active_dims, name)
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Bias, self)._to_dict()
|
||||
input_dict["class"] = "GPy.kern.Bias"
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(kernel_class, input_dict):
|
||||
useGPU = input_dict.pop('useGPU', None)
|
||||
return Bias(**input_dict)
|
||||
|
||||
def K(self, X, X2=None):
|
||||
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
|
||||
return np.full(shape, self.variance, dtype=np.float64)
|
||||
|
|
@ -250,4 +265,3 @@ class Precomputed(Fixed):
|
|||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
self.variance.gradient = np.einsum('i,ii', dL_dKdiag, self._index(X, None))
|
||||
|
||||
|
|
|
|||
|
|
@ -79,6 +79,13 @@ class Stationary(Kern):
|
|||
assert self.variance.size==1
|
||||
self.link_parameters(self.variance, self.lengthscale)
|
||||
|
||||
def _to_dict(self):
|
||||
input_dict = super(Stationary, self)._to_dict()
|
||||
input_dict["variance"] = self.variance.values.tolist()
|
||||
input_dict["lengthscale"] = self.lengthscale.values.tolist()
|
||||
input_dict["ARD"] = self.ARD
|
||||
return input_dict
|
||||
|
||||
def K_of_r(self, r):
|
||||
raise NotImplementedError("implement the covariance function as a fn of r to use this class")
|
||||
|
||||
|
|
@ -351,6 +358,16 @@ class Exponential(Stationary):
|
|||
def dK_dr(self, r):
|
||||
return -self.K_of_r(r)
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Exponential, self)._to_dict()
|
||||
input_dict["class"] = "GPy.kern.Exponential"
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(kernel_class, input_dict):
|
||||
useGPU = input_dict.pop('useGPU', None)
|
||||
return Exponential(**input_dict)
|
||||
|
||||
# def sde(self):
|
||||
# """
|
||||
# Return the state space representation of the covariance.
|
||||
|
|
@ -399,6 +416,16 @@ class Matern32(Stationary):
|
|||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat32'):
|
||||
super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Matern32, self)._to_dict()
|
||||
input_dict["class"] = "GPy.kern.Matern32"
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(kernel_class, input_dict):
|
||||
useGPU = input_dict.pop('useGPU', None)
|
||||
return Matern32(**input_dict)
|
||||
|
||||
def K_of_r(self, r):
|
||||
return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
|
||||
|
||||
|
|
@ -478,6 +505,16 @@ class Matern52(Stationary):
|
|||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
|
||||
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Matern52, self)._to_dict()
|
||||
input_dict["class"] = "GPy.kern.Matern52"
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(kernel_class, input_dict):
|
||||
useGPU = input_dict.pop('useGPU', None)
|
||||
return Matern52(**input_dict)
|
||||
|
||||
def K_of_r(self, r):
|
||||
return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
|
||||
|
||||
|
|
@ -533,6 +570,16 @@ class ExpQuad(Stationary):
|
|||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='ExpQuad'):
|
||||
super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(ExpQuad, self)._to_dict()
|
||||
input_dict["class"] = "GPy.kern.ExpQuad"
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(kernel_class, input_dict):
|
||||
useGPU = input_dict.pop('useGPU', None)
|
||||
return ExpQuad(**input_dict)
|
||||
|
||||
def K_of_r(self, r):
|
||||
return self.variance * np.exp(-0.5 * r**2)
|
||||
|
||||
|
|
@ -566,6 +613,17 @@ class RatQuad(Stationary):
|
|||
self.power = Param('power', power, Logexp())
|
||||
self.link_parameters(self.power)
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(RatQuad, self)._to_dict()
|
||||
input_dict["class"] = "GPy.kern.RatQuad"
|
||||
input_dict["power"] = self.power.values.tolist()
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(kernel_class, input_dict):
|
||||
useGPU = input_dict.pop('useGPU', None)
|
||||
return RatQuad(**input_dict)
|
||||
|
||||
def K_of_r(self, r):
|
||||
r2 = np.square(r)
|
||||
# return self.variance*np.power(1. + r2/2., -self.power)
|
||||
|
|
@ -588,5 +646,3 @@ class RatQuad(Stationary):
|
|||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
super(RatQuad, self).update_gradients_diag(dL_dKdiag, X)
|
||||
self.power.gradient = 0.
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -7,4 +7,5 @@ from .student_t import StudentT
|
|||
from .likelihood import Likelihood
|
||||
from .mixed_noise import MixedNoise
|
||||
from .binomial import Binomial
|
||||
|
||||
from .weibull import Weibull
|
||||
from .loglogistic import LogLogistic
|
||||
|
|
|
|||
|
|
@ -29,6 +29,11 @@ class Bernoulli(Likelihood):
|
|||
if isinstance(gp_link , (link_functions.Heaviside, link_functions.Probit)):
|
||||
self.log_concave = True
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Bernoulli, self)._to_dict()
|
||||
input_dict["class"] = "GPy.likelihoods.Bernoulli"
|
||||
return input_dict
|
||||
|
||||
def _preprocess_values(self, Y):
|
||||
"""
|
||||
Check if the values of the observations correspond to the values
|
||||
|
|
|
|||
|
|
@ -66,7 +66,14 @@ class Binomial(Likelihood):
|
|||
np.testing.assert_array_equal(N.shape, y.shape)
|
||||
|
||||
nchoosey = special.gammaln(N+1) - special.gammaln(y+1) - special.gammaln(N-y+1)
|
||||
return nchoosey + y*np.log(inv_link_f) + (N-y)*np.log(1.-inv_link_f)
|
||||
|
||||
Ny = N-y
|
||||
t1 = np.zeros(y.shape)
|
||||
t2 = np.zeros(y.shape)
|
||||
t1[y>0] = y[y>0]*np.log(inv_link_f[y>0])
|
||||
t2[Ny>0] = Ny[Ny>0]*np.log(1.-inv_link_f[Ny>0])
|
||||
|
||||
return nchoosey + t1 + t2
|
||||
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -86,7 +93,13 @@ class Binomial(Likelihood):
|
|||
N = Y_metadata['trials']
|
||||
np.testing.assert_array_equal(N.shape, y.shape)
|
||||
|
||||
return y/inv_link_f - (N-y)/(1.-inv_link_f)
|
||||
Ny = N-y
|
||||
t1 = np.zeros(y.shape)
|
||||
t2 = np.zeros(y.shape)
|
||||
t1[y>0] = y[y>0]/inv_link_f[y>0]
|
||||
t2[Ny>0] = (Ny[Ny>0])/(1.-inv_link_f[Ny>0])
|
||||
|
||||
return t1 - t2
|
||||
|
||||
def d2logpdf_dlink2(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -111,7 +124,13 @@ class Binomial(Likelihood):
|
|||
"""
|
||||
N = Y_metadata['trials']
|
||||
np.testing.assert_array_equal(N.shape, y.shape)
|
||||
return -y/np.square(inv_link_f) - (N-y)/np.square(1.-inv_link_f)
|
||||
Ny = N-y
|
||||
t1 = np.zeros(y.shape)
|
||||
t2 = np.zeros(y.shape)
|
||||
t1[y>0] = -y[y>0]/np.square(inv_link_f[y>0])
|
||||
t2[Ny>0] = -(Ny[Ny>0])/np.square(1.-inv_link_f[Ny>0])
|
||||
return t1+t2
|
||||
|
||||
|
||||
def d3logpdf_dlink3(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -135,8 +154,14 @@ class Binomial(Likelihood):
|
|||
N = Y_metadata['trials']
|
||||
np.testing.assert_array_equal(N.shape, y.shape)
|
||||
|
||||
inv_link_f2 = np.square(inv_link_f)
|
||||
return 2*y/inv_link_f**3 - 2*(N-y)/(1.-inv_link_f)**3
|
||||
#inv_link_f2 = np.square(inv_link_f) #TODO Remove. Why is this here?
|
||||
|
||||
Ny = N-y
|
||||
t1 = np.zeros(y.shape)
|
||||
t2 = np.zeros(y.shape)
|
||||
t1[y>0] = 2*y[y>0]/inv_link_f[y>0]**3
|
||||
t2[Ny>0] = - 2*(Ny[Ny>0])/(1.-inv_link_f[Ny>0])**3
|
||||
return t1 + t2
|
||||
|
||||
def samples(self, gp, Y_metadata=None, **kw):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -46,6 +46,13 @@ class Gaussian(Likelihood):
|
|||
if isinstance(gp_link, link_functions.Identity):
|
||||
self.log_concave = True
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Gaussian, self)._to_dict()
|
||||
input_dict["class"] = "GPy.likelihoods.Gaussian"
|
||||
input_dict["variance"] = self.variance.values.tolist()
|
||||
return input_dict
|
||||
|
||||
|
||||
def betaY(self,Y,Y_metadata=None):
|
||||
#TODO: ~Ricardo this does not live here
|
||||
raise RuntimeError("Please notify the GPy developers, this should not happen")
|
||||
|
|
@ -57,7 +64,10 @@ class Gaussian(Likelihood):
|
|||
def update_gradients(self, grad):
|
||||
self.variance.gradient = grad
|
||||
|
||||
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
|
||||
def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag, Y_metadata=None, quad_mode='gk', boost_grad=1.):
|
||||
return self.exact_inference_gradients(dL_dKdiag)
|
||||
|
||||
def exact_inference_gradients(self, dL_dKdiag, Y_metadata=None):
|
||||
return dL_dKdiag.sum()
|
||||
|
||||
def _preprocess_values(self, Y):
|
||||
|
|
|
|||
|
|
@ -6,8 +6,12 @@ from scipy import stats,special
|
|||
import scipy as sp
|
||||
from . import link_functions
|
||||
from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian, safe_exp
|
||||
from ..util.quad_integrate import quadgk_int
|
||||
from scipy.integrate import quad
|
||||
from functools import partial
|
||||
|
||||
import warnings
|
||||
|
||||
from ..core.parameterization import Parameterized
|
||||
|
||||
class Likelihood(Parameterized):
|
||||
|
|
@ -40,6 +44,37 @@ class Likelihood(Parameterized):
|
|||
self.gp_link = gp_link
|
||||
self.log_concave = False
|
||||
self.not_block_really = False
|
||||
self.name = name
|
||||
|
||||
def to_dict(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def _to_dict(self):
|
||||
input_dict = {}
|
||||
input_dict["name"] = self.name
|
||||
input_dict["gp_link_dict"] = self.gp_link.to_dict()
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def from_dict(input_dict):
|
||||
import copy
|
||||
input_dict = copy.deepcopy(input_dict)
|
||||
likelihood_class = input_dict.pop('class')
|
||||
input_dict["name"] = str(input_dict["name"])
|
||||
name = input_dict.pop('name')
|
||||
import GPy
|
||||
likelihood_class = eval(likelihood_class)
|
||||
return likelihood_class._from_dict(likelihood_class, input_dict)
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(likelihood_class, input_dict):
|
||||
import copy
|
||||
input_dict = copy.deepcopy(input_dict)
|
||||
gp_link_dict = input_dict.pop('gp_link_dict')
|
||||
import GPy
|
||||
gp_link = GPy.likelihoods.link_functions.GPTransformation.from_dict(gp_link_dict)
|
||||
input_dict["gp_link"] = gp_link
|
||||
return likelihood_class(**input_dict)
|
||||
|
||||
def request_num_latent_functions(self, Y):
|
||||
"""
|
||||
|
|
@ -223,6 +258,91 @@ class Likelihood(Parameterized):
|
|||
self.__gh_points = np.polynomial.hermite.hermgauss(T)
|
||||
return self.__gh_points
|
||||
|
||||
def ep_gradients(self, Y, cav_tau, cav_v, dL_dKdiag, Y_metadata=None, quad_mode='gk', boost_grad=1.):
|
||||
if self.size > 0:
|
||||
shape = Y.shape
|
||||
tau,v,Y = cav_tau.flatten(), cav_v.flatten(),Y.flatten()
|
||||
mu = v/tau
|
||||
sigma2 = 1./tau
|
||||
|
||||
# assert Y.shape == v.shape
|
||||
dlik_dtheta = np.empty((self.size, Y.shape[0]))
|
||||
# for j in range(self.size):
|
||||
Y_metadata_list = []
|
||||
for index in range(len(Y)):
|
||||
Y_metadata_i = {}
|
||||
if Y_metadata is not None:
|
||||
for key in Y_metadata.keys():
|
||||
Y_metadata_i[key] = Y_metadata[key][index,:]
|
||||
Y_metadata_list.append(Y_metadata_i)
|
||||
|
||||
if quad_mode == 'gk':
|
||||
f = partial(self.integrate_gk)
|
||||
quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten()), Y_metadata_list))
|
||||
quads = np.vstack(quads)
|
||||
quads.reshape(self.size, shape[0], shape[1])
|
||||
elif quad_mode == 'gh':
|
||||
f = partial(self.integrate_gh)
|
||||
quads = zip(*map(f, Y.flatten(), mu.flatten(), np.sqrt(sigma2.flatten())))
|
||||
quads = np.hstack(quads)
|
||||
quads = quads.T
|
||||
else:
|
||||
raise Exception("no other quadrature mode available")
|
||||
# do a gaussian-hermite integration
|
||||
dL_dtheta_avg = boost_grad * np.nanmean(quads, axis=1)
|
||||
dL_dtheta = boost_grad * np.nansum(quads, axis=1)
|
||||
# dL_dtheta = boost_grad * np.nansum(dlik_dtheta, axis=1)
|
||||
else:
|
||||
dL_dtheta = np.zeros(self.num_params)
|
||||
return dL_dtheta
|
||||
|
||||
|
||||
def integrate_gk(self, Y, mu, sigma, Y_metadata_i=None):
|
||||
# gaussian-kronrod integration.
|
||||
fmin = -np.inf
|
||||
fmax = np.inf
|
||||
SQRT_2PI = np.sqrt(2.*np.pi)
|
||||
def generate_integral(f):
|
||||
a = np.exp(self.logpdf_link(f, Y, Y_metadata_i)) * np.exp(-0.5 * np.square((f - mu) / sigma)) / (
|
||||
SQRT_2PI * sigma)
|
||||
fn1 = a * self.dlogpdf_dtheta(f, Y, Y_metadata_i)
|
||||
fn = fn1
|
||||
return fn
|
||||
|
||||
dF_dtheta_i = quadgk_int(generate_integral, fmin=fmin, fmax=fmax)
|
||||
return dF_dtheta_i
|
||||
|
||||
def integrate_gh(self, Y, mu, sigma, Y_metadata_i=None, gh_points=None):
|
||||
# gaussian-hermite quadrature.
|
||||
# "calculate site derivatives E_f{d logp(y_i|f_i)/da} where a is a likelihood parameter
|
||||
# and the expectation is over the exact marginal posterior, which is not gaussian- and is
|
||||
# unnormalised product of the cavity distribution(a Gaussian) and the exact likelihood term.
|
||||
#
|
||||
# calculate the expectation wrt the approximate marginal posterior, which should be approximately the same.
|
||||
# . This term is needed for evaluating the
|
||||
# gradients of the marginal likelihood estimate Z_EP wrt likelihood parameters."
|
||||
# "writing it explicitly "
|
||||
# use them for gaussian-hermite quadrature
|
||||
|
||||
SQRT_2PI = np.sqrt(2.*np.pi)
|
||||
if gh_points is None:
|
||||
gh_x, gh_w = self._gh_points(32)
|
||||
else:
|
||||
gh_x, gh_w = gh_points
|
||||
|
||||
X = gh_x[None,:]*np.sqrt(2.)*sigma + mu
|
||||
|
||||
# Here X is a grid vector of possible fi values, while Y is just a single value which will be broadcasted.
|
||||
a = np.exp(self.logpdf_link(X, Y, Y_metadata_i))
|
||||
a = a.repeat(self.num_params,0)
|
||||
b = self.dlogpdf_dtheta(X, Y, Y_metadata_i)
|
||||
old_shape = b.shape
|
||||
fn = np.array([i*j for i,j in zip(a.flatten(), b.flatten())])
|
||||
fn = fn.reshape(old_shape)
|
||||
|
||||
dF_dtheta_i = np.dot(fn, gh_w)/np.sqrt(np.pi)
|
||||
return dF_dtheta_i
|
||||
|
||||
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
|
||||
"""
|
||||
Use Gauss-Hermite Quadrature to compute
|
||||
|
|
|
|||
|
|
@ -43,6 +43,25 @@ class GPTransformation(object):
|
|||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def to_dict(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def _to_dict(self):
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def from_dict(input_dict):
|
||||
import copy
|
||||
input_dict = copy.deepcopy(input_dict)
|
||||
link_class = input_dict.pop('class')
|
||||
import GPy
|
||||
link_class = eval(link_class)
|
||||
return link_class._from_dict(link_class, input_dict)
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(link_class, input_dict):
|
||||
return link_class(**input_dict)
|
||||
|
||||
class Identity(GPTransformation):
|
||||
"""
|
||||
.. math::
|
||||
|
|
@ -62,6 +81,10 @@ class Identity(GPTransformation):
|
|||
def d3transf_df3(self,f):
|
||||
return np.zeros_like(f)
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Identity, self)._to_dict()
|
||||
input_dict["class"] = "GPy.likelihoods.link_functions.Identity"
|
||||
return input_dict
|
||||
|
||||
class Probit(GPTransformation):
|
||||
"""
|
||||
|
|
@ -82,6 +105,11 @@ class Probit(GPTransformation):
|
|||
def d3transf_df3(self,f):
|
||||
return (safe_square(f)-1.)*std_norm_pdf(f)
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Probit, self)._to_dict()
|
||||
input_dict["class"] = "GPy.likelihoods.link_functions.Probit"
|
||||
return input_dict
|
||||
|
||||
|
||||
class Cloglog(GPTransformation):
|
||||
"""
|
||||
|
|
|
|||
304
GPy/likelihoods/loggaussian.py
Normal file
304
GPy/likelihoods/loggaussian.py
Normal file
|
|
@ -0,0 +1,304 @@
|
|||
# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats, special
|
||||
from ..core.parameterization import Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
from . import link_functions
|
||||
from .likelihood import Likelihood
|
||||
|
||||
|
||||
|
||||
class LogGaussian(Likelihood):
|
||||
"""
|
||||
.. math::
|
||||
$$ p(y_{i}|f_{i}, z_{i}) = \\prod_{i=1}^{n} (\\frac{ry^{r-1}}{\\exp{f(x_{i})}})^{1-z_i} (1 + (\\frac{y}{\\exp(f(x_{i}))})^{r})^{z_i-2} $$
|
||||
|
||||
.. note:
|
||||
where z_{i} is the censoring indicator- 0 for non-censored data, and 1 for censored data.
|
||||
|
||||
|
||||
"""
|
||||
def __init__(self,gp_link=None, sigma=1.):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Identity()
|
||||
# gp_link = link_functions.Log()
|
||||
|
||||
super(LogGaussian, self).__init__(gp_link, name='loggaussian')
|
||||
|
||||
self.sigma = Param('sigma', sigma, Logexp())
|
||||
self.variance = Param('variance', sigma**2, Logexp())
|
||||
self.link_parameter(self.variance)
|
||||
# self.link_parameter()
|
||||
|
||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
return np.exp(self.logpdf_link(link_f, y, Y_metadata=Y_metadata))
|
||||
|
||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
:param link_f: latent variables (link(f))
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
uncensored = (1-c)* (-0.5*np.log(2*np.pi*self.variance) - np.log(y) - (np.log(y)-link_f)**2 /(2*self.variance) )
|
||||
censored = c*np.log( 1 - stats.norm.cdf((np.log(y) - link_f)/np.sqrt(self.variance)) )
|
||||
logpdf = uncensored + censored
|
||||
return logpdf
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
derivative of logpdf wrt link_f param
|
||||
.. math::
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
val = np.log(y) - link_f
|
||||
val_scaled = val/np.sqrt(self.variance)
|
||||
val_scaled2 = val/self.variance
|
||||
uncensored = (1-c)*(val_scaled2)
|
||||
a = (1- stats.norm.cdf(val_scaled))
|
||||
# llg(z) = 1. / (1 - norm_cdf(r / sqrt(s2))). * (1 / sqrt(2 * pi * s2). * exp(-1 / (2. * s2). * r. ^ 2));
|
||||
censored = c*( 1./a) * (np.exp(-1.* val**2 /(2*self.variance)) / np.sqrt(2*np.pi*self.variance))
|
||||
# censored = c * (1. / (1 - stats.norm.cdf(val_scaled))) * (stats.norm.pdf(val_scaled))
|
||||
gradient = uncensored + censored
|
||||
return gradient
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given link(f), w.r.t link(f)
|
||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||
:rtype: Nx1 array
|
||||
|
||||
.. Note::
|
||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
# c = Y_metadata['censored']
|
||||
# c = np.zeros((y.shape[0],))
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
val = np.log(y) - link_f
|
||||
val_scaled = val/np.sqrt(self.variance)
|
||||
val_scaled2 = val/self.variance
|
||||
a = (1 - stats.norm.cdf(val_scaled))
|
||||
uncensored = (1-c) *(-1)/self.variance
|
||||
censored = c*(-np.exp(-val**2/self.variance) / ( 2*np.pi*self.variance*(a**2) ) +
|
||||
val*np.exp(-(val**2)/(2*self.variance))/( np.sqrt(2*np.pi)*self.variance**(3/2.)*a) )
|
||||
hessian = censored + uncensored
|
||||
return hessian
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log-likelihood function at y given f, w.r.t shape parameter
|
||||
|
||||
.. math::
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: float
|
||||
"""
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
val = np.log(y) - link_f
|
||||
val_scaled = val/np.sqrt(self.variance)
|
||||
val_scaled2 = val/self.variance
|
||||
a = (1 - stats.norm.cdf(val_scaled))
|
||||
uncensored = 0
|
||||
censored = c *( 2*np.exp(-3*(val**2)/(2*self.variance)) / ((a**3)*(2*np.pi*self.variance)**(3/2.))
|
||||
- val*np.exp(-(val**2)/self.variance)/ ( (a**2)*np.pi*self.variance**2)
|
||||
- val*np.exp(-(val**2)/self.variance)/ ( (a**2)*2*np.pi*self.variance**2)
|
||||
- np.exp(-(val**2)/(2*self.variance))/ ( a*(self.variance**(1.50))*np.sqrt(2*np.pi))
|
||||
+ (val**2)*np.exp(-(val**2)/(2*self.variance))/ ( a*np.sqrt(2*np.pi*self.variance)*self.variance**2 ) )
|
||||
d3pdf_dlink3 = uncensored + censored
|
||||
return d3pdf_dlink3
|
||||
|
||||
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log-likelihood function at y given f, w.r.t variance parameter
|
||||
|
||||
.. math::
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: float
|
||||
"""
|
||||
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
val = np.log(y) - link_f
|
||||
val_scaled = val/np.sqrt(self.variance)
|
||||
val_scaled2 = val/self.variance
|
||||
a = (1 - stats.norm.cdf(val_scaled))
|
||||
uncensored = (1-c)*(-0.5/self.variance + (val**2)/(2*(self.variance**2)) )
|
||||
censored = c *( val*np.exp(-val**2/ (2*self.variance)) / (a*np.sqrt(2*np.pi)*2*(self.variance**(1.5))) )
|
||||
dlogpdf_dvar = uncensored + censored
|
||||
# dlogpdf_dvar = dlogpdf_dvar*self.variance
|
||||
return dlogpdf_dvar
|
||||
|
||||
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in gaussian
|
||||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
val = np.log(y) - link_f
|
||||
val_scaled = val/np.sqrt(self.variance)
|
||||
val_scaled2 = val/self.variance
|
||||
a = (1 - stats.norm.cdf(val_scaled))
|
||||
uncensored = (1-c)*(-val/(self.variance**2))
|
||||
censored = c * (-val*np.exp(-val**2/self.variance)/( 4*np.pi*(self.variance**2)*(a**2)) +
|
||||
(-1 + (val**2)/self.variance)*np.exp(-val**2/(2*self.variance) ) /
|
||||
( a*(np.sqrt(2.*np.pi)*2*self.variance**1.5)) )
|
||||
dlik_grad_dsigma = uncensored + censored
|
||||
# dlik_grad_dsigma = dlik_grad_dsigma*self.variance
|
||||
return dlik_grad_dsigma
|
||||
|
||||
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in gaussian
|
||||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
val = np.log(y) - link_f
|
||||
val_scaled = val/np.sqrt(self.variance)
|
||||
val_scaled2 = val/self.variance
|
||||
a = (1 - stats.norm.cdf(val_scaled))
|
||||
uncensored = (1-c)*( 1./(self.variance**2) )
|
||||
censored = c*( val*np.exp(-3*(val**2)/(2*self.variance) )/ ((a**3)*np.sqrt(8*np.pi**3)*self.variance**(5/2.))
|
||||
+ np.exp(-val**2/self.variance)/((a**2)*4*np.pi*self.variance**2)
|
||||
- np.exp(-val**2/self.variance)*val**2 / ((a**2)*2*np.pi*self.variance**3)
|
||||
+ np.exp(-val**2/self.variance)/ ( (a**2)*4*np.pi*self.variance**2)
|
||||
- np.exp(-val**2/ (2*self.variance))*val / ( a*np.sqrt(2*np.pi)*2*self.variance**(5/2.))
|
||||
- np.exp(-val**2/self.variance)*(val**2) / ((a**2)*4*np.pi*self.variance**3)
|
||||
- np.exp(-val**2/ (2*self.variance))*val/ (a*np.sqrt(2*np.pi)*self.variance**(5/2.))
|
||||
+ np.exp(-val**2/ (2*self.variance))*(val**3) / (a*np.sqrt(2*np.pi)*2*self.variance**(7/2.)) )
|
||||
dlik_hess_dsigma = uncensored + censored
|
||||
return dlik_hess_dsigma
|
||||
|
||||
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in gaussian
|
||||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
dlogpdf_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
dlogpdf_dtheta[0,:,:] = self.dlogpdf_link_dvar(f,y,Y_metadata=Y_metadata)
|
||||
return dlogpdf_dtheta
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in gaussian
|
||||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
dlogpdf_dlink_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
dlogpdf_dlink_dtheta[0,:,:] = self.dlogpdf_dlink_dvar(f,y,Y_metadata=Y_metadata)
|
||||
return dlogpdf_dlink_dtheta
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata not used in gaussian
|
||||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
d2logpdf_dlink2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
d2logpdf_dlink2_dtheta[0,:,:] = self.d2logpdf_dlink2_dvar(f,y,Y_metadata=Y_metadata)
|
||||
return d2logpdf_dlink2_dtheta
|
||||
|
||||
def update_gradients(self, grads):
|
||||
"""
|
||||
Pull out the gradients, be careful as the order must match the order
|
||||
in which the parameters are added
|
||||
"""
|
||||
self.variance.gradient = grads[0]
|
||||
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
"""
|
||||
orig_shape = gp.shape
|
||||
gp = gp.flatten()
|
||||
339
GPy/likelihoods/loglogistic.py
Normal file
339
GPy/likelihoods/loglogistic.py
Normal file
|
|
@ -0,0 +1,339 @@
|
|||
from __future__ import division
|
||||
# Copyright (c) 2015 Alan Saul
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats,special
|
||||
import scipy as sp
|
||||
from ..core.parameterization import Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
from . import link_functions
|
||||
from .likelihood import Likelihood
|
||||
from .link_functions import Log
|
||||
|
||||
class LogLogistic(Likelihood):
|
||||
"""
|
||||
.. math::
|
||||
$$ p(y_{i}|f_{i}, z_{i}) = \\prod_{i=1}^{n} (\\frac{ry^{r-1}}{\\exp{f(x_{i})}})^{1-z_i} (1 + (\\frac{y}{\\exp(f(x_{i}))})^{r})^{z_i-2} $$
|
||||
|
||||
.. note:
|
||||
where z_{i} is the censoring indicator- 0 for non-censored data, and 1 for censored data.
|
||||
"""
|
||||
|
||||
def __init__(self, gp_link=None, r=1.0):
|
||||
if gp_link is None:
|
||||
#Parameterised not as link_f but as f
|
||||
gp_link = Log()
|
||||
|
||||
super(LogLogistic, self).__init__(gp_link, name='LogLogistic')
|
||||
self.r = Param('r_log_shape', float(r), Logexp())
|
||||
self.link_parameter(self.r)
|
||||
# self.censored = 'censored'
|
||||
|
||||
|
||||
|
||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given link(f)
|
||||
|
||||
.. math::
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
return np.exp(self.logpdf_link(link_f, y, Y_metadata=Y_metadata))
|
||||
|
||||
|
||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
|
||||
|
||||
:param link_f: latent variables (link(f))
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
|
||||
"""
|
||||
# c = np.zeros((y.shape[0],))
|
||||
c = np.zeros_like(link_f)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
link_f = np.clip(link_f, 1e-150, 1e100)
|
||||
# y_link_f = y/link_f
|
||||
# y_link_f_r = y_link_f**self.r
|
||||
# y_link_f_r = np.clip(y**self.r, 1e-150, 1e200) / np.clip(link_f**self.r, 1e-150, 1e200)
|
||||
# y_link_f_r = np.clip((y/link_f)**self.r, 1e-150, 1e200)
|
||||
y_r = np.clip(y**self.r, 1e-150, 1e200)
|
||||
link_f_r = np.clip(link_f**self.r, 1e-150, 1e200)
|
||||
y_link_f_r = np.clip(y_r / link_f_r, 1e-150, 1e200)
|
||||
#uncensored = (1-c)*(np.log(self.r) + (self.r+1)*np.log(y) - self.r*np.log(link_f) - 2*np.log1p(y_link_f_r))
|
||||
#uncensored = (1-c)*(np.log((self.r/link_f)*y_link_f**(self.r-1)) - 2*np.log1p(y_link_f_r))
|
||||
|
||||
# clever way tp break it into censored and uncensored-parts ..
|
||||
uncensored = (1-c)*(np.log(self.r) + (self.r-1)*np.log(y) - self.r*np.log(link_f) - 2*np.log1p(y_link_f_r))
|
||||
censored = (c)*(-np.log1p(y_link_f_r))
|
||||
#
|
||||
return uncensored + censored
|
||||
# return uncensored
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
|
||||
:param link_f: latent variables (f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: gradient of likelihood evaluated at points
|
||||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
# c = Y_metadata['censored']
|
||||
# for debugging
|
||||
# c = np.zeros((y.shape[0],))
|
||||
c = np.zeros_like(link_f)
|
||||
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
#y_link_f = y/link_f
|
||||
#y_link_f_r = y_link_f**self.r
|
||||
y_link_f_r = np.clip(y**self.r, 1e-150, 1e200) / np.clip(link_f**self.r, 1e-150, 1e200)
|
||||
|
||||
#In terms of link_f
|
||||
# uncensored = (1-c)*( (2*self.r*y**r)/(link_f**self.r + y**self.r) - link_f*self.r)
|
||||
uncensored = (1-c)*self.r*(y_link_f_r - 1)/(link_f*(1 + y_link_f_r))
|
||||
censored = c*(self.r*y_link_f_r/(link_f*y_link_f_r + link_f))
|
||||
return uncensored + censored
|
||||
# return uncensored
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given link(f), w.r.t link(f)
|
||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||
:rtype: Nx1 array
|
||||
|
||||
.. Note::
|
||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
# c = Y_metadata['censored']
|
||||
# c = np.zeros((y.shape[0],))
|
||||
c = np.zeros_like(link_f)
|
||||
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
y_link_f = y/link_f
|
||||
y_link_f_r = y_link_f**self.r
|
||||
|
||||
#In terms of link_f
|
||||
censored = c*(-self.r*y_link_f_r*(y_link_f_r + self.r + 1)/((link_f**2)*(y_link_f_r + 1)**2))
|
||||
uncensored = (1-c)*(-self.r*(2*self.r*y_link_f_r + y_link_f**(2*self.r) - 1) / ((link_f**2)*(1+ y_link_f_r)**2))
|
||||
hess = censored + uncensored
|
||||
return hess
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
# c = Y_metadata['censored']
|
||||
# for debugging
|
||||
# c = np.zeros((y.shape[0],))
|
||||
c = np.zeros_like(link_f)
|
||||
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
y_link_f = y/link_f
|
||||
y_link_f_r = y_link_f**self.r
|
||||
|
||||
#In terms of link_f
|
||||
censored = c*(self.r*y_link_f_r*(((self.r**2)*(-(y_link_f_r - 1))) + 3*self.r*(y_link_f_r + 1) + 2*(y_link_f_r + 1)**2)
|
||||
/ ((link_f**3)*(y_link_f_r + 1)**3))
|
||||
uncensored = (1-c)*(2*self.r*(-(self.r**2)*(y_link_f_r -1)*y_link_f_r + 3*self.r*(y_link_f_r + 1)*y_link_f_r + (y_link_f_r - 1)*(y_link_f_r + 1)**2)
|
||||
/ ((link_f**3)*(y_link_f_r + 1)**3))
|
||||
|
||||
d3lik_dlink3 = censored + uncensored
|
||||
return d3lik_dlink3
|
||||
|
||||
def dlogpdf_link_dr(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log-likelihood function at y given f, w.r.t shape parameter
|
||||
|
||||
.. math::
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: float
|
||||
"""
|
||||
# c = Y_metadata['censored']
|
||||
# c = np.zeros((y.shape[0],))
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
link_f = inv_link_f #FIXME: Change names consistently...
|
||||
y_link_f = y/link_f
|
||||
log_y_link_f = np.log(y) - np.log(link_f)
|
||||
y_link_f_r = y_link_f**self.r
|
||||
|
||||
#In terms of link_f
|
||||
censored = c*(-y_link_f_r*log_y_link_f/(1 + y_link_f_r))
|
||||
uncensored = (1-c)*(1./self.r + np.log(y) - np.log(link_f) - (2*y_link_f_r*log_y_link_f) / (1 + y_link_f_r))
|
||||
|
||||
dlogpdf_dr = censored + uncensored
|
||||
return dlogpdf_dr
|
||||
|
||||
def dlogpdf_dlink_dr(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Derivative of the dlogpdf_dlink w.r.t shape parameter
|
||||
|
||||
.. math::
|
||||
|
||||
:param inv_link_f: latent variables inv_link_f
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
# c = np.zeros((y.shape[0],))
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
link_f = inv_link_f
|
||||
y_link_f = y/link_f
|
||||
y_link_f_r = y_link_f**self.r
|
||||
log_y_link_f = np.log(y) - np.log(link_f)
|
||||
|
||||
#In terms of link_f
|
||||
censored = c*(y_link_f_r*(y_link_f_r + self.r*log_y_link_f + 1)/(link_f*(y_link_f_r + 1)**2))
|
||||
uncensored = (1-c)*(y_link_f**(2*self.r) + 2*self.r*y_link_f_r*log_y_link_f - 1) / (link_f*(1 + y_link_f_r)**2)
|
||||
|
||||
# dlogpdf_dlink_dr = uncensored
|
||||
dlogpdf_dlink_dr = censored + uncensored
|
||||
return dlogpdf_dlink_dr
|
||||
|
||||
def d2logpdf_dlink2_dr(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the hessian (d2logpdf_dlink2) w.r.t shape parameter
|
||||
|
||||
.. math::
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
# c = Y_metadata['censored']
|
||||
|
||||
# c = np.zeros((y.shape[0],))
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
link_f = inv_link_f
|
||||
y_link_f = y/link_f
|
||||
y_link_f_r = y_link_f**self.r
|
||||
log_y_link_f = np.log(y) - np.log(link_f)
|
||||
|
||||
#In terms of link_f
|
||||
y_link_f_2r = y_link_f**(2*self.r)
|
||||
denom2 = (link_f**2)*(1 + y_link_f_r)**2
|
||||
denom3 = (link_f**2)*(1 + y_link_f_r)**3
|
||||
|
||||
censored = c*(-((y_link_f_r + self.r + 1)*y_link_f_r)/denom2
|
||||
-(self.r*(y_link_f_r + self.r + 1)*y_link_f_r*log_y_link_f)/denom2
|
||||
-(self.r*y_link_f_r*(y_link_f_r*log_y_link_f + 1))/denom2
|
||||
+(2*self.r*(y_link_f_r + self.r + 1)*y_link_f_2r*log_y_link_f)/denom3
|
||||
)
|
||||
|
||||
uncensored = (1-c)*(-(2*self.r*y_link_f_r + y_link_f_2r - 1)/denom2
|
||||
-(self.r*(2*y_link_f_r + 2*self.r*y_link_f_r*log_y_link_f + 2*y_link_f_2r*log_y_link_f)/denom2)
|
||||
+(2*self.r*(2*self.r*y_link_f_r + y_link_f_2r - 1)*y_link_f_r*log_y_link_f)/denom3
|
||||
)
|
||||
d2logpdf_dlink2_dr = censored + uncensored
|
||||
|
||||
return d2logpdf_dlink2_dr
|
||||
|
||||
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
dlogpdf_dtheta[0, :, :] = self.dlogpdf_link_dr(f, y, Y_metadata=Y_metadata)
|
||||
return dlogpdf_dtheta
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dlink_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
dlogpdf_dlink_dtheta[0, :, :] = self.dlogpdf_dlink_dr(f, y, Y_metadata=Y_metadata)
|
||||
return dlogpdf_dlink_dtheta
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
|
||||
d2logpdf_dlink2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
d2logpdf_dlink2_dtheta[0,:, :] = self.d2logpdf_dlink2_dr(f, y, Y_metadata=Y_metadata)
|
||||
return d2logpdf_dlink2_dtheta
|
||||
|
||||
def update_gradients(self, grads):
|
||||
"""
|
||||
Pull out the gradients, be careful as the order must match the order
|
||||
in which the parameters are added
|
||||
"""
|
||||
self.r.gradient = grads[0]
|
||||
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
"""
|
||||
orig_shape = gp.shape
|
||||
gp = gp.flatten()
|
||||
#rs = np.ones_like(gp)*self.r
|
||||
#scales = np.ones_like(gp)*np.sqrt(self.sigma2)
|
||||
#Ysim = sp.stats.fisk.rvs(rs, scale=self.gp_link.transf(gp))
|
||||
Ysim = np.array([sp.stats.fisk.rvs(self.r, loc=0, scale=self.gp_link.transf(f)) for f in gp])
|
||||
#np.random.fisk(self.gp_link.transf(gp), c=self.r)
|
||||
return Ysim.reshape(orig_shape)
|
||||
|
||||
322
GPy/likelihoods/weibull.py
Normal file
322
GPy/likelihoods/weibull.py
Normal file
|
|
@ -0,0 +1,322 @@
|
|||
# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats, special
|
||||
import scipy as sp
|
||||
from ..core.parameterization import Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
from . import link_functions
|
||||
from .likelihood import Likelihood
|
||||
|
||||
|
||||
class Weibull(Likelihood):
|
||||
"""
|
||||
Implementing Weibull likelihood function ...
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, gp_link=None, beta=1.):
|
||||
if gp_link is None:
|
||||
#Parameterised not as link_f but as f
|
||||
# gp_link = link_functions.Identity()
|
||||
#Parameterised as link_f
|
||||
gp_link = link_functions.Log()
|
||||
super(Weibull, self).__init__(gp_link, name='Weibull')
|
||||
|
||||
self.r = Param('r_weibull_shape', float(beta), Logexp())
|
||||
self.link_parameter(self.r)
|
||||
|
||||
def pdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Likelihood function given link(f)
|
||||
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in weibull distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
c = np.zeros((link_f.shape[0],))
|
||||
|
||||
# log_objective = np.log(self.r) + (self.r - 1) * np.log(y) - link_f - (np.exp(-link_f) * (y ** self.r))
|
||||
# log_objective = stats.weibull_min.pdf(y,c=self.beta,loc=link_f,scale=1.)
|
||||
log_objective = self.logpdf_link(link_f, y, Y_metadata)
|
||||
return np.exp(log_objective)
|
||||
|
||||
def logpdf_link(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Log Likelihood Function given link(f)
|
||||
|
||||
.. math::
|
||||
\\ln p(y_{i}|\lambda(f_{i})) = \\alpha_{i}\\log \\beta - \\log \\Gamma(\\alpha_{i}) + (\\alpha_{i} - 1)\\log y_{i} - \\beta y_{i}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables (link(f))
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in poisson distribution
|
||||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
|
||||
"""
|
||||
# alpha = self.gp_link.transf(gp)*self.beta sum(log(a) + (a-1).*log(y)- f - exp(-f).*y.^a)
|
||||
# return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
# uncensored = (1-c)* (np.log(self.r) + (self.r - 1) * np.log(y) - link_f - (np.exp(-link_f) * (y ** self.r)))
|
||||
# censored = (-c)*np.exp(-link_f)*(y**self.r)
|
||||
uncensored = (1-c)*( np.log(self.r)-np.log(link_f)+(self.r-1)*np.log(y) - y**self.r/link_f)
|
||||
censored = -c*y**self.r/link_f
|
||||
|
||||
log_objective = uncensored + censored
|
||||
return log_objective
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d \\ln p(y_{i}|\\lambda(f_{i}))}{d\\lambda(f)} = \\beta (\\log \\beta y_{i}) - \\Psi(\\alpha_{i})\\beta\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables (f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in gamma distribution
|
||||
:returns: gradient of likelihood evaluated at points
|
||||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
# grad = (1. - self.beta) / (y - link_f)
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
# uncensored = (1-c)* ( -1 + np.exp(-link_f)*(y ** self.r))
|
||||
# censored = c*np.exp(-link_f)*(y**self.r)
|
||||
uncensored = (1-c)*(-1/link_f + y**self.r/link_f**2)
|
||||
censored = c*y**self.r/link_f**2
|
||||
grad = uncensored + censored
|
||||
return grad
|
||||
|
||||
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Hessian at y, given link(f), w.r.t link(f)
|
||||
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
|
||||
The hessian will be 0 unless i == j
|
||||
|
||||
.. math::
|
||||
\\frac{d^{2} \\ln p(y_{i}|\lambda(f_{i}))}{d^{2}\\lambda(f)} = -\\beta^{2}\\frac{d\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in gamma distribution
|
||||
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
|
||||
:rtype: Nx1 array
|
||||
|
||||
.. Note::
|
||||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
# hess = (self.beta - 1.) / (y - link_f)**2
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
# uncensored = (1-c)* (-(y ** self.r) * np.exp(-link_f))
|
||||
# censored = -c*np.exp(-link_f)*y**self.r
|
||||
uncensored = (1-c)*(1/link_f**2 -2*y**self.r/link_f**3)
|
||||
censored = -c*2*y**self.r/link_f**3
|
||||
hess = uncensored + censored
|
||||
# hess = -(y ** self.r) * np.exp(-link_f)
|
||||
return hess
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
|
||||
|
||||
.. math::
|
||||
\\frac{d^{3} \\ln p(y_{i}|\lambda(f_{i}))}{d^{3}\\lambda(f)} = -\\beta^{3}\\frac{d^{2}\\Psi(\\alpha_{i})}{d\\alpha_{i}}\\\\
|
||||
\\alpha_{i} = \\beta y_{i}
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in gamma distribution
|
||||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
# d3lik_dlink3 = (1. - self.beta) / (y - link_f)**3
|
||||
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
# uncensored = (1-c)* ((y ** self.r) * np.exp(-link_f))
|
||||
# censored = c*np.exp(-link_f)*y**self.r
|
||||
uncensored = (1-c)*(-2/link_f**3+ 6*y**self.r/link_f**4)
|
||||
censored = c*6*y**self.r/link_f**4
|
||||
|
||||
d3lik_dlink3 = uncensored + censored
|
||||
# d3lik_dlink3 = (y ** self.r) * np.exp(-link_f)
|
||||
return d3lik_dlink3
|
||||
|
||||
def exact_inference_gradients(self, dL_dKdiag, Y_metadata=None):
|
||||
return np.zeros(self.size)
|
||||
|
||||
def dlogpdf_link_dr(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
||||
Gradient of the log-likelihood function at y given f, w.r.t shape parameter
|
||||
|
||||
.. math::
|
||||
|
||||
:param inv_link_f: latent variables link(f)
|
||||
:type inv_link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: includes censoring information in dictionary key 'censored'
|
||||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: float
|
||||
"""
|
||||
c = np.zeros_like(y)
|
||||
link_f = inv_link_f
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
uncensored = (1-c)* (1./self.r + np.log(y) - y**self.r*np.log(y)/link_f)
|
||||
censored = (-c*y**self.r*np.log(y)/link_f)
|
||||
dlogpdf_dr = uncensored + censored
|
||||
return dlogpdf_dr
|
||||
|
||||
def dlogpdf_dlink_dr(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
First order derivative derivative of loglikelihood wrt r:shape parameter
|
||||
|
||||
:param link_f: latent variables link(f)
|
||||
:type link_f: Nx1 array
|
||||
:param y: data
|
||||
:type y: Nx1 array
|
||||
:param Y_metadata: Y_metadata which is not used in gamma distribution
|
||||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
# dlogpdf_dlink_dr = self.beta * y**(self.beta - 1) * np.exp(-link_f)
|
||||
# dlogpdf_dlink_dr = np.exp(-link_f) * (y ** self.r) * np.log(y)
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
link_f = inv_link_f
|
||||
# uncensored = (1-c)*(np.exp(-link_f)* (y ** self.r) * np.log(y))
|
||||
# censored = c*np.exp(-link_f)*(y**self.r)*np.log(y)
|
||||
uncensored = (1-c)*(y**self.r*np.log(y)/link_f**2)
|
||||
censored = c*(y**self.r*np.log(y)/link_f**2)
|
||||
dlogpdf_dlink_dr = uncensored + censored
|
||||
return dlogpdf_dlink_dr
|
||||
|
||||
def d2logpdf_dlink2_dr(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
||||
Derivative of hessian of loglikelihood wrt r-shape parameter.
|
||||
:param link_f:
|
||||
:param y:
|
||||
:param Y_metadata:
|
||||
:return:
|
||||
"""
|
||||
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
# uncensored = (1-c)*( -np.exp(-link_f)* (y ** self.r) * np.log(y))
|
||||
# censored = -c*np.exp(-link_f)*(y**self.r)*np.log(y)
|
||||
uncensored = (1-c)*-2*y**self.r*np.log(y)/link_f**3
|
||||
censored = c*-2*y**self.r*np.log(y)/link_f**3
|
||||
d2logpdf_dlink_dr = uncensored + censored
|
||||
|
||||
return d2logpdf_dlink_dr
|
||||
|
||||
def d3logpdf_dlink3_dr(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
||||
:param link_f:
|
||||
:param y:
|
||||
:param Y_metadata:
|
||||
:return:
|
||||
"""
|
||||
c = np.zeros_like(y)
|
||||
if Y_metadata is not None and 'censored' in Y_metadata.keys():
|
||||
c = Y_metadata['censored']
|
||||
|
||||
uncensored = (1-c)* ((y**self.r)*np.exp(-link_f)*np.log1p(y))
|
||||
censored = c*np.exp(-link_f)*(y**self.r)*np.log(y)
|
||||
d3logpdf_dlink3_dr = uncensored + censored
|
||||
return d3logpdf_dlink3_dr
|
||||
|
||||
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
|
||||
:param f:
|
||||
:param y:
|
||||
:param Y_metadata:
|
||||
:return:
|
||||
"""
|
||||
dlogpdf_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
dlogpdf_dtheta[0, :, :] = self.dlogpdf_link_dr(f, y, Y_metadata=Y_metadata)
|
||||
return dlogpdf_dtheta
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
|
||||
:param f:
|
||||
:param y:
|
||||
:param Y_metadata:
|
||||
:return:
|
||||
"""
|
||||
dlogpdf_dlink_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
dlogpdf_dlink_dtheta[0, :, :] = self.dlogpdf_dlink_dr(f, y, Y_metadata)
|
||||
return dlogpdf_dlink_dtheta
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
|
||||
:param f:
|
||||
:param y:
|
||||
:param Y_metadata:
|
||||
:return:
|
||||
"""
|
||||
d2logpdf_dlink_dtheta2 = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
d2logpdf_dlink_dtheta2[0, :, :] = self.d2logpdf_dlink2_dr(f, y, Y_metadata)
|
||||
return d2logpdf_dlink_dtheta2
|
||||
|
||||
def update_gradients(self, grads):
|
||||
"""
|
||||
Pull out the gradients, be careful as the order must match the order
|
||||
in which the parameters are added
|
||||
"""
|
||||
self.r.gradient = grads[0]
|
||||
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
"""
|
||||
Returns a set of samples of observations conditioned on a given value of latent variable f.
|
||||
|
||||
:param gp: latent variable
|
||||
"""
|
||||
orig_shape = gp.shape
|
||||
gp = gp.flatten()
|
||||
weibull_samples = np.array([sp.stats.weibull_min.rvs(self.r, loc=0, scale=self.gp_link.transf(f)) for f in gp])
|
||||
return weibull_samples.reshape(orig_shape)
|
||||
|
|
@ -38,3 +38,9 @@ class Constant(Mapping):
|
|||
|
||||
def gradients_X(self, dL_dF, X):
|
||||
return np.zeros_like(X)
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Constant, self)._to_dict()
|
||||
input_dict["class"] = "GPy.mappings.Constant"
|
||||
input_dict["value"] = self.C.values[0]
|
||||
return input_dict
|
||||
|
|
|
|||
|
|
@ -19,8 +19,7 @@ class Identity(Mapping):
|
|||
def gradients_X(self, dL_dF, X):
|
||||
return dL_dF
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Identity, self)._to_dict()
|
||||
input_dict["class"] = "GPy.mappings.Identity"
|
||||
return input_dict
|
||||
|
|
|
|||
|
|
@ -37,3 +37,21 @@ class Linear(Mapping):
|
|||
|
||||
def gradients_X(self, dL_dF, X):
|
||||
return np.dot(dL_dF, self.A.T)
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Linear, self)._to_dict()
|
||||
input_dict["class"] = "GPy.mappings.Linear"
|
||||
input_dict["A"] = self.A.values.tolist()
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(mapping_class, input_dict):
|
||||
import copy
|
||||
input_dict = copy.deepcopy(input_dict)
|
||||
A = np.array(input_dict.pop('A'))
|
||||
l = Linear(**input_dict)
|
||||
l.unlink_parameter(l.A)
|
||||
l.update_model(False)
|
||||
l.A = Param('A', A)
|
||||
l.link_parameter(l.A)
|
||||
return l
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ class MLP(Mapping):
|
|||
|
||||
# Backpropagation to hidden layer.
|
||||
dL_dact = np.dot(dL_dF, self.W2.T)
|
||||
dL_dlayer1 = dL_dact / np.square(np.cosh(layer1))
|
||||
dL_dlayer1 = dL_dact * (1 - np.square(activations))
|
||||
|
||||
# Finally, evaluate the first-layer gradients.
|
||||
self.W1.gradient = np.dot(X.T,dL_dlayer1)
|
||||
|
|
@ -47,7 +47,7 @@ class MLP(Mapping):
|
|||
|
||||
# Backpropagation to hidden layer.
|
||||
dL_dact = np.dot(dL_dF, self.W2.T)
|
||||
dL_dlayer1 = dL_dact / np.square(np.cosh(layer1))
|
||||
dL_dlayer1 = dL_dact * (1 - np.square(activations))
|
||||
|
||||
return np.dot(dL_dlayer1, self.W1.T)
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from .gplvm import GPLVM
|
|||
from .bcgplvm import BCGPLVM
|
||||
from .sparse_gplvm import SparseGPLVM
|
||||
from .warped_gp import WarpedGP
|
||||
from .input_warped_gp import InputWarpedGP
|
||||
from .bayesian_gplvm import BayesianGPLVM
|
||||
from .mrd import MRD
|
||||
from .gradient_checker import GradientChecker, HessianChecker, SkewChecker
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
from ..core import GP
|
||||
from .. import likelihoods
|
||||
from .. import kern
|
||||
import numpy as np
|
||||
from ..inference.latent_function_inference.expectation_propagation import EP
|
||||
|
||||
class GPClassification(GP):
|
||||
|
|
@ -27,3 +28,23 @@ class GPClassification(GP):
|
|||
likelihood = likelihoods.Bernoulli()
|
||||
|
||||
GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=EP(), mean_function=mean_function, name='gp_classification')
|
||||
|
||||
@staticmethod
|
||||
def from_gp(gp):
|
||||
from copy import deepcopy
|
||||
gp = deepcopy(gp)
|
||||
GPClassification(gp.X, gp.Y, gp.kern, gp.likelihood, gp.inference_method, gp.mean_function, name='gp_classification')
|
||||
|
||||
def to_dict(self, save_data=True):
|
||||
model_dict = super(GPClassification,self).to_dict(save_data)
|
||||
model_dict["class"] = "GPy.models.GPClassification"
|
||||
return model_dict
|
||||
|
||||
@staticmethod
|
||||
def from_dict(input_dict, data=None):
|
||||
import GPy
|
||||
m = GPy.core.model.Model.from_dict(input_dict, data)
|
||||
return GPClassification.from_gp(m)
|
||||
|
||||
def save_model(self, output_filename, compress=True, save_data=True):
|
||||
self._save_model(output_filename, compress=True, save_data=True)
|
||||
|
|
|
|||
|
|
@ -35,3 +35,23 @@ class GPRegression(GP):
|
|||
|
||||
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function)
|
||||
|
||||
@staticmethod
|
||||
def from_gp(gp):
|
||||
from copy import deepcopy
|
||||
gp = deepcopy(gp)
|
||||
return GPRegression(gp.X, gp.Y, gp.kern, gp.Y_metadata, gp.normalizer, gp.likelihood.variance.values, gp.mean_function)
|
||||
|
||||
def to_dict(self, save_data=True):
|
||||
model_dict = super(GPRegression,self).to_dict(save_data)
|
||||
model_dict["class"] = "GPy.models.GPRegression"
|
||||
return model_dict
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(input_dict, data=None):
|
||||
import GPy
|
||||
input_dict["class"] = "GPy.core.GP"
|
||||
m = GPy.core.GP.from_dict(input_dict, data)
|
||||
return GPRegression.from_gp(m)
|
||||
|
||||
def save_model(self, output_filename, compress=True, save_data=True):
|
||||
self._save_model(output_filename, compress=True, save_data=True)
|
||||
|
|
|
|||
149
GPy/models/input_warped_gp.py
Normal file
149
GPy/models/input_warped_gp.py
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..core import GP
|
||||
from .. import likelihoods
|
||||
from ..util.input_warping_functions import KumarWarping
|
||||
from .. import kern
|
||||
|
||||
|
||||
class InputWarpedGP(GP):
|
||||
"""Input Warped GP
|
||||
|
||||
This defines a GP model that applies a warping function to the Input.
|
||||
By default, it uses Kumar Warping (CDF of Kumaraswamy distribution)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array_like, shape = (n_samples, n_features) for input data
|
||||
|
||||
Y : array_like, shape = (n_samples, 1) for output data
|
||||
|
||||
kernel : object, optional
|
||||
An instance of kernel function defined in GPy.kern
|
||||
Default to Matern 32
|
||||
|
||||
warping_function : object, optional
|
||||
An instance of warping function defined in GPy.util.input_warping_functions
|
||||
Default to KumarWarping
|
||||
|
||||
warping_indices : list of int, optional
|
||||
An list of indices of which features in X should be warped.
|
||||
It is used in the Kumar warping function
|
||||
|
||||
normalizer : bool, optional
|
||||
A bool variable indicates whether to normalize the output
|
||||
|
||||
Xmin : list of float, optional
|
||||
The min values for every feature in X
|
||||
It is used in the Kumar warping function
|
||||
|
||||
Xmax : list of float, optional
|
||||
The max values for every feature in X
|
||||
It is used in the Kumar warping function
|
||||
|
||||
epsilon : float, optional
|
||||
We normalize X to [0+e, 1-e]. If not given, using the default value defined in KumarWarping function
|
||||
|
||||
Attributes
|
||||
----------
|
||||
X_untransformed : array_like, shape = (n_samples, n_features)
|
||||
A copy of original input X
|
||||
|
||||
X_warped : array_like, shape = (n_samples, n_features)
|
||||
Input data after warping
|
||||
|
||||
warping_function : object, optional
|
||||
An instance of warping function defined in GPy.util.input_warping_functions
|
||||
Default to KumarWarping
|
||||
|
||||
Notes
|
||||
-----
|
||||
Kumar warping uses the CDF of Kumaraswamy distribution. More on the Kumaraswamy distribution can be found at the
|
||||
wiki page: https://en.wikipedia.org/wiki/Kumaraswamy_distribution
|
||||
|
||||
References
|
||||
----------
|
||||
Snoek, J.; Swersky, K.; Zemel, R. S. & Adams, R. P.
|
||||
Input Warping for Bayesian Optimization of Non-stationary Functions
|
||||
preprint arXiv:1402.0929, 2014
|
||||
"""
|
||||
def __init__(self, X, Y, kernel=None, normalizer=False, warping_function=None, warping_indices=None, Xmin=None, Xmax=None, epsilon=None):
|
||||
if X.ndim == 1:
|
||||
X = X.reshape(-1, 1)
|
||||
self.X_untransformed = X.copy()
|
||||
|
||||
if kernel is None:
|
||||
kernel = kern.sde_Matern32(X.shape[1], variance=1.)
|
||||
self.kernel = kernel
|
||||
|
||||
if warping_function is None:
|
||||
self.warping_function = KumarWarping(self.X_untransformed, warping_indices, epsilon, Xmin, Xmax)
|
||||
else:
|
||||
self.warping_function = warping_function
|
||||
|
||||
self.X_warped = self.transform_data(self.X_untransformed)
|
||||
likelihood = likelihoods.Gaussian()
|
||||
super(InputWarpedGP, self).__init__(self.X_warped, Y, likelihood=likelihood, kernel=kernel, normalizer=normalizer)
|
||||
|
||||
# Add the parameters in the warping function to the model parameters hierarchy
|
||||
self.link_parameter(self.warping_function)
|
||||
|
||||
def parameters_changed(self):
|
||||
"""Update the gradients of parameters for warping function
|
||||
|
||||
This method is called when having new values of parameters for warping function, kernels
|
||||
and other parameters in a normal GP
|
||||
"""
|
||||
# using the warped X to update
|
||||
self.X = self.transform_data(self.X_untransformed)
|
||||
super(InputWarpedGP, self).parameters_changed()
|
||||
# the gradient of log likelihood w.r.t. input AFTER warping is a product of dL_dK and dK_dX
|
||||
dL_dX = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X)
|
||||
self.warping_function.update_grads(self.X_untransformed, dL_dX)
|
||||
|
||||
def transform_data(self, X, test_data=False):
|
||||
"""Apply warping_function to some Input data
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array_like, shape = (n_samples, n_features)
|
||||
|
||||
test_data: bool, optional
|
||||
Default to False, should set to True when transforming test data
|
||||
"""
|
||||
return self.warping_function.f(X, test_data)
|
||||
|
||||
def log_likelihood(self):
|
||||
"""Compute the marginal log likelihood
|
||||
|
||||
For input warping, just use the normal GP log likelihood
|
||||
"""
|
||||
return GP.log_likelihood(self)
|
||||
|
||||
def predict(self, Xnew):
|
||||
"""Prediction on the new data
|
||||
|
||||
Parameters
|
||||
----------
|
||||
Xnew : array_like, shape = (n_samples, n_features)
|
||||
The test data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
mean : array_like, shape = (n_samples, output.dim)
|
||||
Posterior mean at the location of Xnew
|
||||
|
||||
var : array_like, shape = (n_samples, 1)
|
||||
Posterior variance at the location of Xnew
|
||||
"""
|
||||
Xnew_warped = self.transform_data(Xnew, test_data=True)
|
||||
mean, var = super(InputWarpedGP, self).predict(Xnew_warped, kern=self.kernel, full_cov=False)
|
||||
return mean, var
|
||||
|
||||
if __name__ == '__main__':
|
||||
X = np.random.randn(100, 1)
|
||||
Y = np.sin(X) + np.random.randn(100, 1)*0.05
|
||||
m = InputWarpedGP(X, Y)
|
||||
|
|
@ -65,7 +65,7 @@ def plot_ARD(kernel, filtering=None, legend=False, canvas=None, **kwargs):
|
|||
|
||||
|
||||
if canvas is None:
|
||||
canvas, kwargs = pl().new_canvas(xlim=(-.5, kernel._effective_input_dim-.5), xlabel='input dimension', ylabel='sensitivity', **kwargs)
|
||||
canvas, kwargs = pl().new_canvas(xlim=(-.5, kernel._effective_input_dim-.5), xlabel='input dimension', ylabel='ard contribution', **kwargs)
|
||||
|
||||
for i in range(ard_params.shape[0]):
|
||||
if parts[i].name in filtering:
|
||||
|
|
|
|||
147
GPy/testing/ep_likelihood_tests.py
Normal file
147
GPy/testing/ep_likelihood_tests.py
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
|
||||
import numpy as np
|
||||
import unittest
|
||||
import GPy
|
||||
from GPy.models import GradientChecker
|
||||
|
||||
fixed_seed = 10
|
||||
from nose.tools import with_setup, nottest
|
||||
|
||||
|
||||
# this file will contain some high level tests, this is not unit testing, but will give us a higher level estimate
|
||||
# if things are going well under the hood.
|
||||
class TestObservationModels(unittest.TestCase):
|
||||
def setUp(self):
|
||||
np.random.seed(fixed_seed)
|
||||
self.N = 100
|
||||
self.D = 2
|
||||
self.X = np.random.rand(self.N, self.D)
|
||||
|
||||
self.real_noise_std = 0.05
|
||||
noise = np.random.randn(*self.X[:, 0].shape) * self.real_noise_std
|
||||
self.Y = (np.sin(self.X[:, 0] * 2 * np.pi) + noise)[:, None]
|
||||
self.num_points = self.X.shape[0]
|
||||
self.f = np.random.rand(self.N, 1)
|
||||
self.binary_Y = np.asarray(np.random.rand(self.N) > 0.5, dtype=np.int)[:, None]
|
||||
# self.binary_Y[self.binary_Y == 0.0] = -1.0
|
||||
self.positive_Y = np.exp(self.Y.copy())
|
||||
|
||||
self.Y_noisy = self.Y.copy()
|
||||
self.Y_verynoisy = self.Y.copy()
|
||||
self.Y_noisy[75] += 1.3
|
||||
|
||||
self.init_var = 0.15
|
||||
self.deg_free = 4.
|
||||
censored = np.zeros_like(self.Y)
|
||||
random_inds = np.random.choice(self.N, int(self.N / 2), replace=True)
|
||||
censored[random_inds] = 1
|
||||
self.Y_metadata = dict()
|
||||
self.Y_metadata['censored'] = censored
|
||||
self.kernel1 = GPy.kern.RBF(self.X.shape[1]) + GPy.kern.White(self.X.shape[1])
|
||||
|
||||
def tearDown(self):
|
||||
self.Y = None
|
||||
self.X = None
|
||||
self.binary_Y =None
|
||||
self.positive_Y = None
|
||||
self.kernel1 = None
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def testEPClassification(self):
|
||||
bernoulli = GPy.likelihoods.Bernoulli()
|
||||
laplace_inf = GPy.inference.latent_function_inference.Laplace()
|
||||
|
||||
ep_inf_alt = GPy.inference.latent_function_inference.EP(ep_mode='alternated')
|
||||
ep_inf_nested = GPy.inference.latent_function_inference.EP(ep_mode='nested')
|
||||
ep_inf_fractional = GPy.inference.latent_function_inference.EP(ep_mode='nested', eta=0.9)
|
||||
|
||||
m1 = GPy.core.GP(self.X, self.binary_Y.copy(), kernel=self.kernel1.copy(), likelihood=bernoulli.copy(), inference_method=laplace_inf)
|
||||
m1.randomize()
|
||||
|
||||
m2 = GPy.core.GP(self.X, self.binary_Y.copy(), kernel=self.kernel1.copy(), likelihood=bernoulli.copy(), inference_method=ep_inf_alt)
|
||||
m2.randomize()
|
||||
|
||||
m3 = GPy.core.GP(self.X, self.binary_Y.copy(), kernel=self.kernel1.copy(), likelihood=bernoulli.copy(), inference_method=ep_inf_nested)
|
||||
m3.randomize()
|
||||
#
|
||||
m4 = GPy.core.GP(self.X, self.binary_Y.copy(), kernel=self.kernel1.copy(), likelihood=bernoulli.copy(), inference_method=ep_inf_fractional)
|
||||
m4.randomize()
|
||||
|
||||
optimizer = 'bfgs'
|
||||
|
||||
#do gradcheck here ...
|
||||
# self.assertTrue(m1.checkgrad())
|
||||
# self.assertTrue(m2.checkgrad())
|
||||
# self.assertTrue(m3.checkgrad())
|
||||
# self.assertTrue(m4.checkgrad())
|
||||
|
||||
m1.optimize(optimizer=optimizer, max_iters=300)
|
||||
m2.optimize(optimizer=optimizer, max_iters=300)
|
||||
m3.optimize(optimizer=optimizer, max_iters=300)
|
||||
m4.optimize(optimizer=optimizer, max_iters=300)
|
||||
|
||||
# taking laplace predictions as the ground truth
|
||||
probs_mean_lap, probs_var_lap = m1.predict(self.X)
|
||||
probs_mean_ep_alt, probs_var_ep_alt = m2.predict(self.X)
|
||||
probs_mean_ep_nested, probs_var_ep_nested = m3.predict(self.X)
|
||||
|
||||
# for simple single dimension data , marginal likelihood for laplace and EP approximations should not be so far apart.
|
||||
self.assertAlmostEqual(m1.log_likelihood(), m2.log_likelihood(),delta=1)
|
||||
self.assertAlmostEqual(m1.log_likelihood(), m3.log_likelihood(), delta=1)
|
||||
self.assertAlmostEqual(m1.log_likelihood(), m4.log_likelihood(), delta=5)
|
||||
|
||||
GPy.util.classification.conf_matrix(probs_mean_lap, self.binary_Y)
|
||||
GPy.util.classification.conf_matrix(probs_mean_ep_alt, self.binary_Y)
|
||||
GPy.util.classification.conf_matrix(probs_mean_ep_nested, self.binary_Y)
|
||||
|
||||
@nottest
|
||||
def rmse(self, Y, Ystar):
|
||||
return np.sqrt(np.mean((Y - Ystar) ** 2))
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def test_EP_with_StudentT(self):
|
||||
studentT = GPy.likelihoods.StudentT(deg_free=self.deg_free, sigma2=self.init_var)
|
||||
laplace_inf = GPy.inference.latent_function_inference.Laplace()
|
||||
|
||||
ep_inf_alt = GPy.inference.latent_function_inference.EP(ep_mode='alternated')
|
||||
ep_inf_nested = GPy.inference.latent_function_inference.EP(ep_mode='nested')
|
||||
ep_inf_frac = GPy.inference.latent_function_inference.EP(ep_mode='nested', eta=0.7)
|
||||
|
||||
m1 = GPy.core.GP(self.X.copy(), self.Y_noisy.copy(), kernel=self.kernel1.copy(), likelihood=studentT.copy(), inference_method=laplace_inf)
|
||||
# optimize
|
||||
m1['.*white'].constrain_fixed(1e-5)
|
||||
m1.randomize()
|
||||
|
||||
m2 = GPy.core.GP(self.X.copy(), self.Y_noisy.copy(), kernel=self.kernel1.copy(), likelihood=studentT.copy(), inference_method=ep_inf_alt)
|
||||
m2['.*white'].constrain_fixed(1e-5)
|
||||
# m2.constrain_bounded('.*t_scale2', 0.001, 10)
|
||||
m2.randomize()
|
||||
|
||||
# m3 = GPy.core.GP(self.X, self.Y_noisy.copy(), kernel=self.kernel1, likelihood=studentT.copy(), inference_method=ep_inf_nested)
|
||||
# m3['.*white'].constrain_fixed(1e-5)
|
||||
# # m3.constrain_bounded('.*t_scale2', 0.001, 10)
|
||||
# m3.randomize()
|
||||
|
||||
optimizer='bfgs'
|
||||
m1.optimize(optimizer=optimizer,max_iters=400)
|
||||
m2.optimize(optimizer=optimizer, max_iters=400)
|
||||
# m3.optimize(optimizer=optimizer, max_iters=500)
|
||||
|
||||
self.assertAlmostEqual(m1.log_likelihood(), m2.log_likelihood(),delta=200)
|
||||
|
||||
# self.assertAlmostEqual(m1.log_likelihood(), m3.log_likelihood(), 3)
|
||||
|
||||
preds_mean_lap, preds_var_lap = m1.predict(self.X)
|
||||
preds_mean_alt, preds_var_alt = m2.predict(self.X)
|
||||
# preds_mean_nested, preds_var_nested = m3.predict(self.X)
|
||||
rmse_lap = self.rmse(preds_mean_lap, self.Y)
|
||||
rmse_alt = self.rmse(preds_mean_alt, self.Y)
|
||||
# rmse_nested = self.rmse(preds_mean_nested, self.Y_noisy)
|
||||
|
||||
if rmse_alt > rmse_lap:
|
||||
self.assertAlmostEqual(rmse_lap, rmse_alt, delta=1.5)
|
||||
# m3.optimize(optimizer=optimizer, max_iters=500)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
@ -61,6 +61,18 @@ class InferenceGPEP(unittest.TestCase):
|
|||
Y = lik.samples(f).reshape(-1,1)
|
||||
return X, Y
|
||||
|
||||
def genNoisyData(self):
|
||||
np.random.seed(1)
|
||||
X = np.random.rand(100,1)
|
||||
self.real_std = 0.1
|
||||
noise = np.random.randn(*X[:, 0].shape)*self.real_std
|
||||
Y = (np.sin(X[:, 0]*2*np.pi) + noise)[:, None]
|
||||
self.f = np.random.rand(X.shape[0],1)
|
||||
Y_extra_noisy = Y.copy()
|
||||
Y_extra_noisy[50] += 4.
|
||||
# Y_extra_noisy[80:83] -= 2.
|
||||
return X, Y, Y_extra_noisy
|
||||
|
||||
def test_inference_EP(self):
|
||||
from paramz import ObsAr
|
||||
X, Y = self.genData()
|
||||
|
|
@ -73,11 +85,45 @@ class InferenceGPEP(unittest.TestCase):
|
|||
inference_method=inf,
|
||||
likelihood=lik)
|
||||
K = self.model.kern.K(X)
|
||||
mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self.model.inference_method.expectation_propagation(K, ObsAr(Y), lik, None)
|
||||
|
||||
v_tilde = mu_tilde * tau_tilde
|
||||
p, m, d = self.model.inference_method._inference(K, tau_tilde, v_tilde, lik, Y_metadata=None, Z_tilde=log_Z_tilde.sum())
|
||||
p0, m0, d0 = super(GPy.inference.latent_function_inference.expectation_propagation.EP, inf).inference(k, X,lik ,mu_tilde[:,None], mean_function=None, variance=1./tau_tilde, K=K, Z_tilde=log_Z_tilde.sum() + np.sum(- 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)))
|
||||
post_params, ga_approx, cav_params, log_Z_tilde = self.model.inference_method.expectation_propagation(K, ObsAr(Y), lik, None)
|
||||
|
||||
mu_tilde = ga_approx.v / ga_approx.tau.astype(float)
|
||||
p, m, d = self.model.inference_method._inference(Y, K, ga_approx, cav_params, lik, Y_metadata=None, Z_tilde=log_Z_tilde)
|
||||
p0, m0, d0 = super(GPy.inference.latent_function_inference.expectation_propagation.EP, inf).inference(k, X,lik ,mu_tilde[:,None], mean_function=None, variance=1./ga_approx.tau, K=K, Z_tilde=log_Z_tilde + np.sum(- 0.5*np.log(ga_approx.tau) + 0.5*(ga_approx.v*ga_approx.v*1./ga_approx.tau)))
|
||||
|
||||
assert (np.sum(np.array([m - m0,
|
||||
np.sum(d['dL_dK'] - d0['dL_dK']),
|
||||
np.sum(d['dL_dthetaL'] - d0['dL_dthetaL']),
|
||||
np.sum(d['dL_dm'] - d0['dL_dm']),
|
||||
np.sum(p._woodbury_vector - p0._woodbury_vector),
|
||||
np.sum(p.woodbury_inv - p0.woodbury_inv)])) < 1e6)
|
||||
|
||||
# NOTE: adding a test like above for parameterized likelihood- the above test is
|
||||
# only for probit likelihood which does not have any tunable hyperparameter which is why
|
||||
# the term in dictionary of gradients: dL_dthetaL will always be zero. So here we repeat tests for
|
||||
# student-t likelihood and heterodescastic gaussian noise case. This test simply checks if the posterior
|
||||
# and gradients of log marginal are roughly the same for inference through EP and exact gaussian inference using
|
||||
# the gaussian approximation for the individual likelihood site terms. For probit likelihood, it is possible to
|
||||
# calculate moments analytically, but for other likelihoods, we will need to use numerical quadrature techniques,
|
||||
# and it is possible that any error might creep up because of quadrature implementation.
|
||||
def test_inference_EP_non_classification(self):
|
||||
from paramz import ObsAr
|
||||
X, Y, Y_extra_noisy = self.genNoisyData()
|
||||
deg_freedom = 5.
|
||||
init_noise_var = 0.08
|
||||
lik_studentT = GPy.likelihoods.StudentT(deg_free=deg_freedom, sigma2=init_noise_var)
|
||||
# like_gaussian_noise = GPy.likelihoods.MixedNoise()
|
||||
k = GPy.kern.RBF(1, variance=2., lengthscale=1.1)
|
||||
ep_inf_alt = GPy.inference.latent_function_inference.expectation_propagation.EP(max_iters=4, delta=0.5)
|
||||
# ep_inf_nested = GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode='nested', max_iters=100, delta=0.5)
|
||||
m = GPy.core.GP(X=X,Y=Y_extra_noisy,kernel=k,likelihood=lik_studentT,inference_method=ep_inf_alt)
|
||||
K = m.kern.K(X)
|
||||
post_params, ga_approx, cav_params, log_Z_tilde = m.inference_method.expectation_propagation(K, ObsAr(Y_extra_noisy), lik_studentT, None)
|
||||
|
||||
mu_tilde = ga_approx.v / ga_approx.tau.astype(float)
|
||||
p, m, d = m.inference_method._inference(Y_extra_noisy, K, ga_approx, cav_params, lik_studentT, Y_metadata=None, Z_tilde=log_Z_tilde)
|
||||
p0, m0, d0 = super(GPy.inference.latent_function_inference.expectation_propagation.EP, ep_inf_alt).inference(k, X,lik_studentT ,mu_tilde[:,None], mean_function=None, variance=1./ga_approx.tau, K=K, Z_tilde=log_Z_tilde + np.sum(- 0.5*np.log(ga_approx.tau) + 0.5*(ga_approx.v*ga_approx.v*1./ga_approx.tau)))
|
||||
|
||||
assert (np.sum(np.array([m - m0,
|
||||
np.sum(d['dL_dK'] - d0['dL_dK']),
|
||||
|
|
|
|||
|
|
@ -123,6 +123,11 @@ class TestNoiseModels(object):
|
|||
|
||||
self.var = 0.2
|
||||
self.deg_free = 4.0
|
||||
censored = np.zeros_like(self.Y)
|
||||
random_inds = np.random.choice(self.N, int(self.N / 2), replace=True)
|
||||
censored[random_inds] = 1
|
||||
self.Y_metadata = dict()
|
||||
self.Y_metadata['censored'] = censored
|
||||
|
||||
#Make a bigger step as lower bound can be quite curved
|
||||
self.step = 1e-4
|
||||
|
|
@ -274,6 +279,20 @@ class TestNoiseModels(object):
|
|||
"Y_metadata": {'trials': self.ns},
|
||||
"laplace": True,
|
||||
},
|
||||
"loglogistic_censored": {
|
||||
"model": GPy.likelihoods.LogLogistic(),
|
||||
"link_f_constraints": [self.constrain_positive],
|
||||
"Y": self.positive_Y,
|
||||
"Y_metadata": self.Y_metadata,
|
||||
"laplace": True
|
||||
},
|
||||
"weibull_censored": {
|
||||
"model": GPy.likelihoods.Weibull(),
|
||||
"link_f_constraints": [self.constrain_positive],
|
||||
"Y": self.positive_Y,
|
||||
"Y_metadata": self.Y_metadata,
|
||||
"laplace": True
|
||||
}
|
||||
#,
|
||||
#GAMMA needs some work!"Gamma_default": {
|
||||
#"model": GPy.likelihoods.Gamma(),
|
||||
|
|
|
|||
|
|
@ -399,6 +399,68 @@ class MiscTests(unittest.TestCase):
|
|||
m.optimize()
|
||||
print(m)
|
||||
|
||||
def test_input_warped_gp_identity(self):
|
||||
"""
|
||||
A InputWarpedGP with the identity warping function should be
|
||||
equal to a standard GP.
|
||||
"""
|
||||
k = GPy.kern.RBF(1)
|
||||
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
|
||||
m.optimize()
|
||||
preds = m.predict(self.X)
|
||||
|
||||
warp_k = GPy.kern.RBF(1)
|
||||
warp_f = GPy.util.input_warping_functions.IdentifyWarping()
|
||||
warp_m = GPy.models.InputWarpedGP(self.X, self.Y, kernel=warp_k, warping_function=warp_f)
|
||||
warp_m.optimize()
|
||||
warp_preds = warp_m.predict(self.X)
|
||||
|
||||
np.testing.assert_almost_equal(preds, warp_preds, decimal=4)
|
||||
|
||||
def test_kumar_warping_gradient(self):
|
||||
n_X = 100
|
||||
np.random.seed(0)
|
||||
X = np.random.randn(n_X, 2)
|
||||
Y = np.sum(np.sin(X), 1).reshape(n_X, 1)
|
||||
|
||||
k1 = GPy.kern.Linear(2)
|
||||
m1 = GPy.models.InputWarpedGP(X, Y, kernel=k1)
|
||||
m1.randomize()
|
||||
self.assertEquals(m1.checkgrad(), True)
|
||||
|
||||
k2 = GPy.kern.RBF(2)
|
||||
m2 = GPy.models.InputWarpedGP(X, Y, kernel=k2)
|
||||
m2.randomize()
|
||||
m2.checkgrad()
|
||||
self.assertEquals(m2.checkgrad(), True)
|
||||
|
||||
k3 = GPy.kern.Matern52(2)
|
||||
m3 = GPy.models.InputWarpedGP(X, Y, kernel=k3)
|
||||
m3.randomize()
|
||||
m3.checkgrad()
|
||||
self.assertEquals(m3.checkgrad(), True)
|
||||
|
||||
def test_kumar_warping_parameters(self):
|
||||
np.random.seed(1)
|
||||
X = np.random.rand(5, 2)
|
||||
epsilon = 1e-6
|
||||
|
||||
# testing warping indices
|
||||
warping_ind_1 = [0, 1, 2]
|
||||
warping_ind_2 = [-1, 1, 2]
|
||||
warping_ind_3 = [0, 1.5, 2]
|
||||
self.failUnlessRaises(ValueError, GPy.util.input_warping_functions.KumarWarping, X, warping_ind_1)
|
||||
self.failUnlessRaises(ValueError, GPy.util.input_warping_functions.KumarWarping, X, warping_ind_2)
|
||||
self.failUnlessRaises(ValueError, GPy.util.input_warping_functions.KumarWarping, X, warping_ind_3)
|
||||
|
||||
# testing Xmin and Xmax
|
||||
Xmin_1, Xmax_1 = None, [1, 1]
|
||||
Xmin_2, Xmax_2 = [0, 0], None
|
||||
Xmin_3, Xmax_3 = [0, 0, 0], [1, 1]
|
||||
self.failUnlessRaises(ValueError, GPy.util.input_warping_functions.KumarWarping, X, [0, 1], epsilon, Xmin_1, Xmax_1)
|
||||
self.failUnlessRaises(ValueError, GPy.util.input_warping_functions.KumarWarping, X, [0, 1], epsilon, Xmin_2, Xmax_2)
|
||||
self.failUnlessRaises(ValueError, GPy.util.input_warping_functions.KumarWarping, X, [0, 1], epsilon, Xmin_3, Xmax_3)
|
||||
|
||||
def test_warped_gp_identity(self):
|
||||
"""
|
||||
A WarpedGP with the identity warping function should be
|
||||
|
|
|
|||
39
GPy/testing/quadrature_tests.py
Normal file
39
GPy/testing/quadrature_tests.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
from __future__ import print_function, division
|
||||
import numpy as np
|
||||
import GPy
|
||||
import warnings
|
||||
from ..util.quad_integrate import quadgk_int, quadvgk
|
||||
|
||||
|
||||
|
||||
class QuadTests(np.testing.TestCase):
|
||||
"""
|
||||
test file for checking implementation of gaussian-kronrod quadrature.
|
||||
we will take a function which can be integrated analytically and check if quadgk result is similar or not!
|
||||
through this file we can test how numerically accurate quadrature implementation in native numpy or manual code is.
|
||||
"""
|
||||
def setUp(self):
|
||||
pass
|
||||
|
||||
def test_infinite_quad(self):
|
||||
def f(x):
|
||||
return np.exp(-0.5*x**2)*np.power(x,np.arange(3)[:,None])
|
||||
quad_int_val = quadgk_int(f)
|
||||
real_val = np.sqrt(np.pi * 2)
|
||||
np.testing.assert_almost_equal(real_val, quad_int_val[0], decimal=7)
|
||||
|
||||
def test_finite_quad(self):
|
||||
def f2(x):
|
||||
return x**2
|
||||
quad_int_val = quadvgk(f2, 1.,2.)
|
||||
real_val = 7/3.
|
||||
np.testing.assert_almost_equal(real_val, quad_int_val, decimal=5)
|
||||
|
||||
if __name__ == '__main__':
|
||||
def f(x):
|
||||
return np.exp(-0.5 * x ** 2) * np.power(x, np.arange(3)[:, None])
|
||||
|
||||
quad_int_val = quadgk_int(f)
|
||||
real_val = np.sqrt(np.pi*2)
|
||||
np.testing.assert_almost_equal(real_val, quad_int_val[0], decimal=7)
|
||||
print(quadgk_int(f))
|
||||
202
GPy/testing/serialization_tests.py
Normal file
202
GPy/testing/serialization_tests.py
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
'''
|
||||
Created on 20 April 2017
|
||||
|
||||
@author: pgmoren
|
||||
'''
|
||||
import unittest, itertools
|
||||
#import cPickle as pickle
|
||||
import pickle
|
||||
import numpy as np
|
||||
import tempfile
|
||||
import GPy
|
||||
from nose import SkipTest
|
||||
import numpy as np
|
||||
fixed_seed = 11
|
||||
|
||||
|
||||
class Test(unittest.TestCase):
|
||||
def test_serialize_deserialize_kernels(self):
|
||||
k1 = GPy.kern.RBF(2, variance=1.0, lengthscale=[1.0,1.0], ARD=True)
|
||||
k2 = GPy.kern.RatQuad(2, variance=2.0, lengthscale=1.0, power=2.0, active_dims = [0,1])
|
||||
k3 = GPy.kern.Bias(2, variance=2.0, active_dims = [1,0])
|
||||
k4 = GPy.kern.StdPeriodic(2, variance=2.0, lengthscale=1.0, period=1.0, active_dims = [1,1])
|
||||
k5 = GPy.kern.Linear(2, variances=[2.0, 1.0], ARD=True, active_dims = [1,1])
|
||||
k6 = GPy.kern.Exponential(2, variance=1., lengthscale=2)
|
||||
k7 = GPy.kern.Matern32(2, variance=1.0, lengthscale=[1.0,3.0], ARD=True, active_dims = [1,1])
|
||||
k8 = GPy.kern.Matern52(2, variance=2.0, lengthscale=[2.0,1.0], ARD=True, active_dims = [1,0])
|
||||
k9 = GPy.kern.ExpQuad(2, variance=3.0, lengthscale=[1.0,2.0], ARD=True, active_dims = [0,1])
|
||||
k10 = k1 + k1.copy() + k2 + k3 + k4 + k5 + k6
|
||||
k11 = k1 * k2 * k2.copy() * k3 * k4 * k5
|
||||
k12 = (k1 + k2) * (k3 + k4 + k5)
|
||||
k13 = ((k1 + k2) * k3) + k4 + k5 * k7
|
||||
k14 = ((k1 + k2) * k3) + k4 * k5 + k8
|
||||
k15 = ((k1 * k2) * k3) + k4 * k5 + k8 + k9
|
||||
|
||||
k_list = [k1,k2,k3,k4,k5,k6,k7,k8,k9,k10,k11,k12,k13,k14,k15]
|
||||
|
||||
for kk in k_list:
|
||||
kk_dict = kk.to_dict()
|
||||
kk_r = GPy.kern.Kern.from_dict(kk_dict)
|
||||
assert type(kk) == type(kk_r)
|
||||
np.testing.assert_array_equal(kk[:], kk_r[:])
|
||||
np.testing.assert_array_equal(np.array(kk.active_dims), np.array(kk_r.active_dims))
|
||||
|
||||
def test_serialize_deserialize_mappings(self):
|
||||
m1 = GPy.mappings.Identity(3,2)
|
||||
m2 = GPy.mappings.Constant(3,2,1)
|
||||
m2_r = GPy.core.mapping.Mapping.from_dict(m2.to_dict())
|
||||
np.testing.assert_array_equal(m2.C.values[:], m2_r.C.values[:])
|
||||
m3 = GPy.mappings.Linear(3,2)
|
||||
m3_r = GPy.core.mapping.Mapping.from_dict(m3.to_dict())
|
||||
assert np.all(m3.A == m3_r.A)
|
||||
|
||||
m_list = [m1, m2, m3]
|
||||
for mm in m_list:
|
||||
mm_dict = mm.to_dict()
|
||||
mm_r = GPy.core.mapping.Mapping.from_dict(mm_dict)
|
||||
assert type(mm) == type(mm_r)
|
||||
assert type(mm.input_dim) == type(mm_r.input_dim)
|
||||
assert type(mm.output_dim) == type(mm_r.output_dim)
|
||||
|
||||
def test_serialize_deserialize_likelihoods(self):
|
||||
l1 = GPy.likelihoods.Gaussian(GPy.likelihoods.link_functions.Identity(),variance=3.0)
|
||||
l1_r = GPy.likelihoods.likelihood.Likelihood.from_dict(l1.to_dict())
|
||||
l2 = GPy.likelihoods.Bernoulli(GPy.likelihoods.link_functions.Probit())
|
||||
l2_r = GPy.likelihoods.likelihood.Likelihood.from_dict(l2.to_dict())
|
||||
assert type(l1) == type(l1_r)
|
||||
assert np.all(l1.variance == l1_r.variance)
|
||||
assert type(l2) == type(l2_r)
|
||||
|
||||
def test_serialize_deserialize_normalizers(self):
|
||||
n1 = GPy.util.normalizer.Standardize()
|
||||
n1.scale_by(np.random.rand(10))
|
||||
n1_r = GPy.util.normalizer._Norm.from_dict((n1.to_dict()))
|
||||
assert type(n1) == type(n1_r)
|
||||
assert np.all(n1.mean == n1_r.mean)
|
||||
assert np.all(n1.std == n1_r.std)
|
||||
|
||||
def test_serialize_deserialize_link_functions(self):
|
||||
l1 = GPy.likelihoods.link_functions.Identity()
|
||||
l2 = GPy.likelihoods.link_functions.Probit()
|
||||
l_list = [l1, l2]
|
||||
for ll in l_list:
|
||||
ll_dict = ll.to_dict()
|
||||
ll_r = GPy.likelihoods.link_functions.GPTransformation.from_dict(ll_dict)
|
||||
assert type(ll) == type(ll_r)
|
||||
|
||||
def test_serialize_deserialize_inference_methods(self):
|
||||
|
||||
e1 = GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode="nested")
|
||||
e1.ga_approx_old = GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10))
|
||||
e1._ep_approximation = []
|
||||
e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.posteriorParams(np.random.rand(10),np.random.rand(100).reshape((10,10))))
|
||||
e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.gaussianApproximation(np.random.rand(10),np.random.rand(10)))
|
||||
e1._ep_approximation.append(GPy.inference.latent_function_inference.expectation_propagation.cavityParams(10))
|
||||
e1._ep_approximation[-1].v = np.random.rand(10)
|
||||
e1._ep_approximation[-1].tau = np.random.rand(10)
|
||||
e1._ep_approximation.append(np.random.rand(10))
|
||||
e1_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e1.to_dict())
|
||||
|
||||
assert type(e1) == type(e1_r)
|
||||
assert e1.epsilon==e1_r.epsilon
|
||||
assert e1.eta==e1_r.eta
|
||||
assert e1.delta==e1_r.delta
|
||||
assert e1.always_reset==e1_r.always_reset
|
||||
assert e1.max_iters==e1_r.max_iters
|
||||
assert e1.ep_mode==e1_r.ep_mode
|
||||
assert e1.parallel_updates==e1_r.parallel_updates
|
||||
|
||||
np.testing.assert_array_equal(e1.ga_approx_old.tau[:], e1_r.ga_approx_old.tau[:])
|
||||
np.testing.assert_array_equal(e1.ga_approx_old.v[:], e1_r.ga_approx_old.v[:])
|
||||
np.testing.assert_array_equal(e1._ep_approximation[0].mu[:], e1_r._ep_approximation[0].mu[:])
|
||||
np.testing.assert_array_equal(e1._ep_approximation[0].Sigma[:], e1_r._ep_approximation[0].Sigma[:])
|
||||
np.testing.assert_array_equal(e1._ep_approximation[1].tau[:], e1_r._ep_approximation[1].tau[:])
|
||||
np.testing.assert_array_equal(e1._ep_approximation[1].v[:], e1_r._ep_approximation[1].v[:])
|
||||
np.testing.assert_array_equal(e1._ep_approximation[2].tau[:], e1_r._ep_approximation[2].tau[:])
|
||||
np.testing.assert_array_equal(e1._ep_approximation[2].v[:], e1_r._ep_approximation[2].v[:])
|
||||
np.testing.assert_array_equal(e1._ep_approximation[3][:], e1_r._ep_approximation[3][:])
|
||||
|
||||
e2 = GPy.inference.latent_function_inference.exact_gaussian_inference.ExactGaussianInference()
|
||||
e2_r = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(e2.to_dict())
|
||||
|
||||
assert type(e2) == type(e2_r)
|
||||
|
||||
def test_serialize_deserialize_model(self):
|
||||
np.random.seed(fixed_seed)
|
||||
N = 20
|
||||
Nhalf = int(N/2)
|
||||
X = np.hstack([np.random.normal(5, 2, Nhalf), np.random.normal(10, 2, Nhalf)])[:, None]
|
||||
Y = np.hstack([np.ones(Nhalf), np.zeros(Nhalf)])[:, None]
|
||||
kernel = GPy.kern.RBF(1)
|
||||
likelihood = GPy.likelihoods.Bernoulli()
|
||||
inference_method=GPy.inference.latent_function_inference.expectation_propagation.EP(ep_mode="nested")
|
||||
mean_function=None
|
||||
m = GPy.core.GP(X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=inference_method, mean_function=mean_function, normalizer=True, name='gp_classification')
|
||||
m.optimize()
|
||||
m.save_model("temp_test_gp_with_data.json", compress=True, save_data=True)
|
||||
m.save_model("temp_test_gp_without_data.json", compress=True, save_data=False)
|
||||
m1_r = GPy.core.GP.load_model("temp_test_gp_with_data.json.zip")
|
||||
m2_r = GPy.core.GP.load_model("temp_test_gp_without_data.json.zip", (X,Y))
|
||||
import os
|
||||
os.remove("temp_test_gp_with_data.json.zip")
|
||||
os.remove("temp_test_gp_without_data.json.zip")
|
||||
var = m.predict(X)[0]
|
||||
var1_r = m1_r.predict(X)[0]
|
||||
var2_r = m2_r.predict(X)[0]
|
||||
np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
|
||||
np.testing.assert_array_equal(np.array(var).flatten(), np.array(var2_r).flatten())
|
||||
|
||||
def test_serialize_deserialize_inference_GPRegressor(self):
|
||||
np.random.seed(fixed_seed)
|
||||
N = 50
|
||||
N_new = 50
|
||||
D = 1
|
||||
X = np.random.uniform(-3., 3., (N, 1))
|
||||
Y = np.sin(X) + np.random.randn(N, D) * 0.05
|
||||
X_new = np.random.uniform(-3., 3., (N_new, 1))
|
||||
k = GPy.kern.RBF(input_dim=1, lengthscale=10)
|
||||
m = GPy.models.GPRegression(X,Y,k)
|
||||
m.optimize()
|
||||
m.save_model("temp_test_gp_regressor_with_data.json", compress=True, save_data=True)
|
||||
m.save_model("temp_test_gp_regressor_without_data.json", compress=True, save_data=False)
|
||||
m1_r = GPy.models.GPRegression.load_model("temp_test_gp_regressor_with_data.json.zip")
|
||||
m2_r = GPy.models.GPRegression.load_model("temp_test_gp_regressor_without_data.json.zip", (X,Y))
|
||||
import os
|
||||
os.remove("temp_test_gp_regressor_with_data.json.zip")
|
||||
os.remove("temp_test_gp_regressor_without_data.json.zip")
|
||||
|
||||
Xp = np.random.uniform(size=(int(1e5),1))
|
||||
Xp[:,0] = Xp[:,0]*15-5
|
||||
|
||||
_, var = m.predict(Xp)
|
||||
_, var1_r = m1_r.predict(Xp)
|
||||
_, var2_r = m2_r.predict(Xp)
|
||||
np.testing.assert_array_equal(var.flatten(), var1_r.flatten())
|
||||
np.testing.assert_array_equal(var.flatten(), var2_r.flatten())
|
||||
|
||||
def test_serialize_deserialize_inference_GPClassifier(self):
|
||||
np.random.seed(fixed_seed)
|
||||
N = 50
|
||||
Nhalf = int(N/2)
|
||||
X = np.hstack([np.random.normal(5, 2, Nhalf), np.random.normal(10, 2, Nhalf)])[:, None]
|
||||
Y = np.hstack([np.ones(Nhalf), np.zeros(Nhalf)])[:, None]
|
||||
kernel = GPy.kern.RBF(1)
|
||||
m = GPy.models.GPClassification(X, Y, kernel=kernel)
|
||||
m.optimize()
|
||||
m.save_model("temp_test_gp_classifier_with_data.json", compress=True, save_data=True)
|
||||
m.save_model("temp_test_gp_classifier_without_data.json", compress=True, save_data=False)
|
||||
m1_r = GPy.models.GPClassification.load_model("temp_test_gp_classifier_with_data.json.zip")
|
||||
m2_r = GPy.models.GPClassification.load_model("temp_test_gp_classifier_without_data.json.zip", (X,Y))
|
||||
import os
|
||||
os.remove("temp_test_gp_classifier_with_data.json.zip")
|
||||
os.remove("temp_test_gp_classifier_without_data.json.zip")
|
||||
|
||||
var = m.predict(X)[0]
|
||||
var1_r = m1_r.predict(X)[0]
|
||||
var2_r = m2_r.predict(X)[0]
|
||||
np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
|
||||
np.testing.assert_array_equal(np.array(var).flatten(), np.array(var1_r).flatten())
|
||||
|
||||
if __name__ == "__main__":
|
||||
#import sys;sys.argv = ['', 'Test.test_parameter_index_operations']
|
||||
unittest.main()
|
||||
|
|
@ -17,3 +17,4 @@ from . import multioutput
|
|||
from . import parallel
|
||||
from . import functions
|
||||
from . import cluster_with_offset
|
||||
from . import input_warping_functions
|
||||
|
|
|
|||
262
GPy/util/input_warping_functions.py
Normal file
262
GPy/util/input_warping_functions.py
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..core.parameterization import Parameterized, Param
|
||||
from ..core.parameterization.priors import LogGaussian
|
||||
|
||||
|
||||
class InputWarpingFunction(Parameterized):
|
||||
"""Abstract class for input warping functions
|
||||
"""
|
||||
|
||||
def __init__(self, name):
|
||||
super(InputWarpingFunction, self).__init__(name=name)
|
||||
|
||||
def f(self, X, test=False):
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def fgrad_x(self, X):
|
||||
raise NotImplementedError
|
||||
|
||||
def update_grads(self, X, dL_dW):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class IdentifyWarping(InputWarpingFunction):
|
||||
"""The identity warping function, for testing"""
|
||||
def __init__(self):
|
||||
super(IdentifyWarping, self).__init__(name='input_warp_identity')
|
||||
|
||||
def f(self, X, test_data=False):
|
||||
return X
|
||||
|
||||
def fgrad_X(self, X):
|
||||
return np.zeros(X.shape)
|
||||
|
||||
def update_grads(self, X, dL_dW):
|
||||
pass
|
||||
|
||||
|
||||
class InputWarpingTest(InputWarpingFunction):
|
||||
"""The identity warping function, for testing"""
|
||||
def __init__(self):
|
||||
super(InputWarpingTest, self).__init__(name='input_warp_test')
|
||||
self.a = Param('a', 1.0)
|
||||
self.set_prior(LogGaussian(0.0, 0.75))
|
||||
self.link_parameter(self.a)
|
||||
|
||||
def f(self, X, test_data=False):
|
||||
return X * self.a
|
||||
|
||||
def fgrad_X(self, X):
|
||||
return self.ones(X.shape) * self.a
|
||||
|
||||
def update_grads(self, X, dL_dW):
|
||||
self.a.gradient[:] = np.sum(dL_dW * X)
|
||||
|
||||
|
||||
class KumarWarping(InputWarpingFunction):
|
||||
"""Kumar Warping for input data
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array_like, shape = (n_samples, n_features)
|
||||
The input data that is going to be warped
|
||||
|
||||
warping_indices: list of int, optional
|
||||
The features that are going to be warped
|
||||
Default to warp all the features
|
||||
|
||||
epsilon: float, optional
|
||||
Used to normalized input data to [0+e, 1-e]
|
||||
Default to 1e-6
|
||||
|
||||
Xmin : list of float, Optional
|
||||
The min values for each feature defined by users
|
||||
Default to the train minimum
|
||||
|
||||
Xmax : list of float, Optional
|
||||
The max values for each feature defined by users
|
||||
Default to the train maximum
|
||||
|
||||
Attributes
|
||||
----------
|
||||
warping_indices: list of int
|
||||
The features that are going to be warped
|
||||
Default to warp all the features
|
||||
|
||||
warping_dim: int
|
||||
The number of features to be warped
|
||||
|
||||
Xmin : list of float
|
||||
The min values for each feature defined by users
|
||||
Default to the train minimum
|
||||
|
||||
Xmax : list of float
|
||||
The max values for each feature defined by users
|
||||
Default to the train maximum
|
||||
|
||||
epsilon: float
|
||||
Used to normalized input data to [0+e, 1-e]
|
||||
Default to 1e-6
|
||||
|
||||
X_normalized : array_like, shape = (n_samples, n_features)
|
||||
The normalized training X
|
||||
|
||||
scaling : list of float, length = n_features in X
|
||||
Defined as 1.0 / (self.Xmax - self.Xmin)
|
||||
|
||||
params : list of Param
|
||||
The list of all the parameters used in Kumar Warping
|
||||
|
||||
num_parameters: int
|
||||
The number of parameters used in Kumar Warping
|
||||
"""
|
||||
|
||||
def __init__(self, X, warping_indices=None, epsilon=None, Xmin=None, Xmax=None):
|
||||
|
||||
super(KumarWarping, self).__init__(name='input_warp_kumar')
|
||||
|
||||
if warping_indices is not None and np.max(warping_indices) > X.shape[1] -1:
|
||||
raise ValueError("Kumar warping indices exceed feature dimension")
|
||||
|
||||
if warping_indices is not None and np.min(warping_indices) < 0:
|
||||
raise ValueError("Kumar warping indices should be larger than 0")
|
||||
|
||||
if warping_indices is not None and np.any(list(map(lambda x: not isinstance(x, int), warping_indices))):
|
||||
raise ValueError("Kumar warping indices should be integer")
|
||||
|
||||
if Xmin is None and Xmax is None:
|
||||
Xmin = X.min(axis=0)
|
||||
Xmax = X.max(axis=0)
|
||||
else:
|
||||
if Xmin is None or Xmax is None:
|
||||
raise ValueError("Xmin and Xmax need to be provide at the same time!")
|
||||
if len(Xmin) != X.shape[1] or len(Xmax) != X.shape[1]:
|
||||
raise ValueError("Xmin and Xmax should have n_feature values!")
|
||||
|
||||
if epsilon is None:
|
||||
epsilon = 1e-6
|
||||
self.epsilon = epsilon
|
||||
|
||||
self.Xmin = Xmin - self.epsilon
|
||||
self.Xmax = Xmax + self.epsilon
|
||||
self.scaling = 1.0 / (self.Xmax - self.Xmin)
|
||||
self.X_normalized = (X - self.Xmin) / (self.Xmax - self.Xmin)
|
||||
|
||||
if warping_indices is None:
|
||||
warping_indices = range(X.shape[1])
|
||||
|
||||
self.warping_indices = warping_indices
|
||||
self.warping_dim = len(self.warping_indices)
|
||||
self.num_parameters = 2 * self.warping_dim
|
||||
|
||||
# create parameters
|
||||
self.params = [[Param('a%d' % i, 1.0), Param('b%d' % i, 1.0)] for i in range(self.warping_dim)]
|
||||
|
||||
# add constraints
|
||||
for i in range(self.warping_dim):
|
||||
self.params[i][0].constrain_bounded(0.0, 10.0)
|
||||
self.params[i][1].constrain_bounded(0.0, 10.0)
|
||||
|
||||
# set priors and add them into handler
|
||||
for i in range(self.warping_dim):
|
||||
self.params[i][0].set_prior(LogGaussian(0.0, 0.75))
|
||||
self.params[i][1].set_prior(LogGaussian(0.0, 0.75))
|
||||
self.link_parameter(self.params[i][0])
|
||||
self.link_parameter(self.params[i][1])
|
||||
|
||||
def f(self, X, test_data=False):
|
||||
"""Apply warping_function to some Input data
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
X : array_like, shape = (n_samples, n_features)
|
||||
|
||||
test_data: bool, optional
|
||||
Default to False, should set to True when transforming test data
|
||||
|
||||
Returns
|
||||
-------
|
||||
X_warped : array_like, shape = (n_samples, n_features)
|
||||
The warped input data
|
||||
|
||||
Math
|
||||
----
|
||||
f(x) = 1 - (1 - x^a)^b
|
||||
"""
|
||||
X_warped = X.copy()
|
||||
if test_data:
|
||||
X_normalized = (X - self.Xmin) / (self.Xmax - self.Xmin)
|
||||
else:
|
||||
X_normalized = self.X_normalized
|
||||
|
||||
for i_seq, i_fea in enumerate(self.warping_indices):
|
||||
a, b = self.params[i_seq][0], self.params[i_seq][1]
|
||||
X_warped[:, i_fea] = 1 - np.power(1 - np.power(X_normalized[:, i_fea], a), b)
|
||||
return X_warped
|
||||
|
||||
def fgrad_X(self, X):
|
||||
"""Compute the gradient of warping function with respect to X
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array_like, shape = (n_samples, n_features)
|
||||
The location to compute gradient
|
||||
|
||||
Returns
|
||||
-------
|
||||
grad : array_like, shape = (n_samples, n_features)
|
||||
The gradient for every location at X
|
||||
|
||||
Math
|
||||
----
|
||||
grad = a * b * x ^(a-1) * (1 - x^a)^(b-1)
|
||||
"""
|
||||
grad = np.zeros(X.shape)
|
||||
for i_seq, i_fea in enumerate(self.warping_indices):
|
||||
a, b = self.params[i_seq][0], self.params[i_seq][1]
|
||||
grad[:, i_fea] = a * b * np.power(self.X_normalized[:, i_fea], a-1) * \
|
||||
np.power(1 - np.power(self.X_normalized[:, i_fea], a), b-1) * self.scaling[i_fea]
|
||||
return grad
|
||||
|
||||
def update_grads(self, X, dL_dW):
|
||||
"""Update the gradients of marginal log likelihood with respect to the parameters of warping function
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array_like, shape = (n_samples, n_features)
|
||||
The input BEFORE warping
|
||||
|
||||
dL_dW : array_like, shape = (n_samples, n_features)
|
||||
The gradient of marginal log likelihood with respect to the Warped input
|
||||
|
||||
Math
|
||||
----
|
||||
let w = f(x), the input after warping, then
|
||||
dW_da = b * (1 - x^a)^(b - 1) * x^a * ln(x)
|
||||
dW_db = - (1 - x^a)^b * ln(1 - x^a)
|
||||
dL_da = dL_dW * dW_da
|
||||
dL_db = dL_dW * dW_db
|
||||
"""
|
||||
for i_seq, i_fea in enumerate(self.warping_indices):
|
||||
ai, bi = self.params[i_seq][0], self.params[i_seq][1]
|
||||
|
||||
# cache some value for save some computation
|
||||
x_pow_a = np.power(self.X_normalized[:, i_fea], ai)
|
||||
|
||||
# compute gradient for ai, bi on all X
|
||||
dz_dai = bi * np.power(1 - x_pow_a, bi-1) * x_pow_a * np.log(self.X_normalized[:, i_fea])
|
||||
dz_dbi = - np.power(1 - x_pow_a, bi) * np.log(1 - x_pow_a)
|
||||
|
||||
# sum gradients on all the data
|
||||
dL_dai = np.sum(dL_dW[:, i_fea] * dz_dai)
|
||||
dL_dbi = np.sum(dL_dW[:, i_fea] * dz_dbi)
|
||||
self.params[i_seq][0].gradient[:] = dL_dai
|
||||
self.params[i_seq][1].gradient[:] = dL_dbi
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -33,6 +33,27 @@ class _Norm(object):
|
|||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def to_dict(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def _to_dict(self):
|
||||
input_dict = {}
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def from_dict(input_dict):
|
||||
import copy
|
||||
input_dict = copy.deepcopy(input_dict)
|
||||
normalizer_class = input_dict.pop('class')
|
||||
import GPy
|
||||
normalizer_class = eval(normalizer_class)
|
||||
return normalizer_class._from_dict(normalizer_class, input_dict)
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(normalizer_class, input_dict):
|
||||
return normalizer_class(**input_dict)
|
||||
|
||||
|
||||
class Standardize(_Norm):
|
||||
def __init__(self):
|
||||
self.mean = None
|
||||
|
|
@ -50,6 +71,23 @@ class Standardize(_Norm):
|
|||
def scaled(self):
|
||||
return self.mean is not None
|
||||
|
||||
def to_dict(self):
|
||||
input_dict = super(Standardize, self)._to_dict()
|
||||
input_dict["class"] = "GPy.util.normalizer.Standardize"
|
||||
if self.mean is not None:
|
||||
input_dict["mean"] = self.mean.tolist()
|
||||
input_dict["std"] = self.std.tolist()
|
||||
return input_dict
|
||||
|
||||
@staticmethod
|
||||
def _from_dict(kernel_class, input_dict):
|
||||
s = Standardize()
|
||||
if "mean" in input_dict:
|
||||
s.mean = np.array(input_dict["mean"])
|
||||
if "std" in input_dict:
|
||||
s.std = np.array(input_dict["std"])
|
||||
return s
|
||||
|
||||
# Inverse variance to be implemented, disabling for now
|
||||
# If someone in the future want to implement this,
|
||||
# we need to implement the inverse variance for
|
||||
|
|
|
|||
119
GPy/util/quad_integrate.py
Normal file
119
GPy/util/quad_integrate.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
"""
|
||||
The file for utilities related to integration by quadrature methods
|
||||
- will contain implementation for gaussian-kronrod integration.
|
||||
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
def getSubs(Subs, XK, NK=1):
|
||||
M = (Subs[1, :] - Subs[0, :]) / 2
|
||||
C = (Subs[1, :] + Subs[0, :]) / 2
|
||||
I = XK[:, None] * M + np.ones((NK, 1)) * C
|
||||
# A = [Subs(1,:); I]
|
||||
A = np.vstack((Subs[0, :], I))
|
||||
# B = [I;Subs(2,:)]
|
||||
B = np.vstack((I, Subs[1, :]))
|
||||
# Subs = [reshape(A, 1, []);
|
||||
A = A.flatten()
|
||||
# reshape(B, 1, [])];
|
||||
B = B.flatten()
|
||||
Subs = np.vstack((A,B))
|
||||
# Subs = np.concatenate((A, B), axis=0)
|
||||
return Subs
|
||||
|
||||
def quadvgk(feval, fmin, fmax, tol1=1e-5, tol2=1e-5):
|
||||
"""
|
||||
numpy implementation makes use of the code here: http://se.mathworks.com/matlabcentral/fileexchange/18801-quadvgk
|
||||
We here use gaussian kronrod integration already used in gpstuff for evaluating one dimensional integrals.
|
||||
This is vectorised quadrature which means that several functions can be evaluated at the same time over a grid of
|
||||
points.
|
||||
:param f:
|
||||
:param fmin:
|
||||
:param fmax:
|
||||
:param difftol:
|
||||
:return:
|
||||
"""
|
||||
|
||||
XK = np.array([-0.991455371120813, -0.949107912342759, -0.864864423359769, -0.741531185599394,
|
||||
-0.586087235467691, -0.405845151377397, -0.207784955007898, 0.,
|
||||
0.207784955007898, 0.405845151377397, 0.586087235467691,
|
||||
0.741531185599394, 0.864864423359769, 0.949107912342759, 0.991455371120813])
|
||||
WK = np.array([0.022935322010529, 0.063092092629979, 0.104790010322250, 0.140653259715525,
|
||||
0.169004726639267, 0.190350578064785, 0.204432940075298, 0.209482141084728,
|
||||
0.204432940075298, 0.190350578064785, 0.169004726639267,
|
||||
0.140653259715525, 0.104790010322250, 0.063092092629979, 0.022935322010529])
|
||||
# 7-point Gaussian weightings
|
||||
WG = np.array([0.129484966168870, 0.279705391489277, 0.381830050505119, 0.417959183673469,
|
||||
0.381830050505119, 0.279705391489277, 0.129484966168870])
|
||||
|
||||
NK = WK.size
|
||||
G = np.arange(2,NK,2)
|
||||
tol1 = 1e-4
|
||||
tol2 = 1e-4
|
||||
Subs = np.array([[fmin],[fmax]])
|
||||
# number of functions to evaluate in the feval vector of functions.
|
||||
NF = feval(np.zeros(1)).size
|
||||
Q = np.zeros(NF)
|
||||
neval = 0
|
||||
while Subs.size > 0:
|
||||
Subs = getSubs(Subs,XK)
|
||||
M = (Subs[1,:] - Subs[0,:]) / 2
|
||||
C = (Subs[1,:] + Subs[0,:]) / 2
|
||||
# NM = length(M);
|
||||
NM = M.size
|
||||
# x = reshape(XK * M + ones(NK, 1) * C, 1, []);
|
||||
x = XK[:,None]*M + C
|
||||
x = x.flatten()
|
||||
FV = feval(x)
|
||||
# FV = FV[:,None]
|
||||
Q1 = np.zeros((NF, NM))
|
||||
Q2 = np.zeros((NF, NM))
|
||||
|
||||
# for n=1:NF
|
||||
# F = reshape(FV(n,:), NK, []);
|
||||
# Q1(n,:) = M. * sum((WK * ones(1, NM)). * F);
|
||||
# Q2(n,:) = M. * sum((WG * ones(1, NM)). * F(G,:));
|
||||
# end
|
||||
# for i in range(NF):
|
||||
# F = FV
|
||||
# F = F.reshape((NK,-1))
|
||||
# temp_mat = np.sum(np.multiply(WK[:,None]*np.ones((1,NM)), F),axis=0)
|
||||
# Q1[i,:] = np.multiply(M, temp_mat)
|
||||
# temp_mat = np.sum(np.multiply(WG[:,None]*np.ones((1, NM)), F[G-1,:]), axis=0)
|
||||
# Q2[i,:] = np.multiply(M, temp_mat)
|
||||
# ind = np.where(np.logical_or(np.max(np.abs(Q1 -Q2) / Q1) < tol1, (Subs[1,:] - Subs[0,:]) <= tol2) > 0)[0]
|
||||
# Q = Q + np.sum(Q1[:,ind], axis=1)
|
||||
# np.delete(Subs, ind,axis=1)
|
||||
|
||||
Q1 = np.dot(FV.reshape(NF, NK, NM).swapaxes(2,1),WK)*M
|
||||
Q2 = np.dot(FV.reshape(NF, NK, NM).swapaxes(2,1)[:,:,1::2],WG)*M
|
||||
#ind = np.nonzero(np.logical_or(np.max(np.abs((Q1-Q2)/Q1), 0) < difftol , M < xtol))[0]
|
||||
ind = np.nonzero(np.logical_or(np.max(np.abs((Q1-Q2)), 0) < tol1 , (Subs[1,:] - Subs[0,:]) < tol2))[0]
|
||||
Q = Q + np.sum(Q1[:,ind], axis=1)
|
||||
Subs = np.delete(Subs, ind, axis=1)
|
||||
return Q
|
||||
|
||||
def quadgk_int(f, fmin=-np.inf, fmax=np.inf, difftol=0.1):
|
||||
"""
|
||||
Integrate f from fmin to fmax,
|
||||
do integration by substitution
|
||||
x = r / (1-r**2)
|
||||
when r goes from -1 to 1 , x goes from -inf to inf.
|
||||
the interval for quadgk function is from -1 to +1, so we transform the space from (-inf,inf) to (-1,1)
|
||||
:param f:
|
||||
:param fmin:
|
||||
:param fmax:
|
||||
:param difftol:
|
||||
:return:
|
||||
"""
|
||||
difftol = 1e-4
|
||||
def trans_func(r):
|
||||
r2 = np.square(r)
|
||||
x = r / (1-r2)
|
||||
dx_dr = (1 + r2)/(1-r2)**2
|
||||
return f(x)*dx_dr
|
||||
|
||||
integrand = quadvgk(trans_func, -1., 1., difftol, difftol)
|
||||
return integrand
|
||||
|
||||
|
||||
|
|
@ -3,7 +3,7 @@ environment:
|
|||
secure: 8/ZjXFwtd1S7ixd7PJOpptupKKEDhm2da/q3unabJ00=
|
||||
COVERALLS_REPO_TOKEN:
|
||||
secure: d3Luic/ESkGaWnZrvWZTKrzO+xaVwJWaRCEP0F+K/9DQGPSRZsJ/Du5g3s4XF+tS
|
||||
gpy_version: 1.7.7
|
||||
gpy_version: 1.8.0
|
||||
matrix:
|
||||
- PYTHON_VERSION: 2.7
|
||||
MINICONDA: C:\Miniconda-x64
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
[bumpversion]
|
||||
current_version = 1.7.7
|
||||
current_version = 1.8.0
|
||||
tag = True
|
||||
commit = True
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue