mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-11 15:15:15 +02:00
Merge pull request #426 from SheffieldML/devel
some fixes from issues and beckdaniels warped gp improvements
This commit is contained in:
commit
2bb8161937
22 changed files with 722 additions and 310 deletions
|
|
@ -14,6 +14,8 @@ from . import testing
|
|||
from . import kern
|
||||
from . import plotting
|
||||
|
||||
from .util import normalizer
|
||||
|
||||
# backwards compatibility
|
||||
import sys
|
||||
backwards_compatibility = ['lists_and_dicts', 'observable_array', 'ties_and_remappings', 'index_operations']
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
__version__ = "1.2.1"
|
||||
__version__ = "1.4.0"
|
||||
|
|
|
|||
|
|
@ -2,17 +2,17 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from .. import kern
|
||||
from GPy.core.model import Model
|
||||
from paramz import ObsAr
|
||||
from .model import Model
|
||||
from .parameterization.variational import VariationalPosterior
|
||||
from .mapping import Mapping
|
||||
from .. import likelihoods
|
||||
from .. import kern
|
||||
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
|
||||
from GPy.core.parameterization.variational import VariationalPosterior
|
||||
from ..util.normalizer import Standardize
|
||||
from paramz import ObsAr
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from GPy.util.normalizer import MeanNorm
|
||||
logger = logging.getLogger("GP")
|
||||
|
||||
class GP(Model):
|
||||
|
|
@ -28,7 +28,7 @@ class GP(Model):
|
|||
:param Norm normalizer:
|
||||
normalize the outputs Y.
|
||||
Prediction will be un-normalized using this normalizer.
|
||||
If normalizer is None, we will normalize using MeanNorm.
|
||||
If normalizer is None, we will normalize using Standardize.
|
||||
If normalizer is False, no normalization will be done.
|
||||
|
||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||
|
|
@ -49,7 +49,7 @@ class GP(Model):
|
|||
logger.info("initializing Y")
|
||||
|
||||
if normalizer is True:
|
||||
self.normalizer = MeanNorm()
|
||||
self.normalizer = Standardize()
|
||||
elif normalizer is False:
|
||||
self.normalizer = None
|
||||
else:
|
||||
|
|
@ -64,6 +64,7 @@ class GP(Model):
|
|||
self.Y_normalized = self.Y
|
||||
else:
|
||||
self.Y = Y
|
||||
self.Y_normalized = self.Y
|
||||
|
||||
if Y.shape[0] != self.num_data:
|
||||
#There can be cases where we want inputs than outputs, for example if we have multiple latent
|
||||
|
|
@ -248,14 +249,16 @@ class GP(Model):
|
|||
"""
|
||||
#predict the latent function values
|
||||
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
|
||||
if self.normalizer is not None:
|
||||
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
|
||||
|
||||
if include_likelihood:
|
||||
# now push through likelihood
|
||||
if likelihood is None:
|
||||
likelihood = self.likelihood
|
||||
mu, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
|
||||
|
||||
if self.normalizer is not None:
|
||||
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
|
||||
|
||||
return mu, var
|
||||
|
||||
def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
|
||||
|
|
@ -300,11 +303,14 @@ class GP(Model):
|
|||
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
|
||||
"""
|
||||
m, v = self._raw_predict(X, full_cov=False, kern=kern)
|
||||
if self.normalizer is not None:
|
||||
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
|
||||
if likelihood is None:
|
||||
likelihood = self.likelihood
|
||||
return likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
|
||||
|
||||
quantiles = likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
|
||||
|
||||
if self.normalizer is not None:
|
||||
quantiles = [self.normalizer.inverse_mean(q) for q in quantiles]
|
||||
return quantiles
|
||||
|
||||
def predictive_gradients(self, Xnew, kern=None):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ class Priorizable(Parameterizable):
|
|||
|
||||
def __setstate__(self, state):
|
||||
super(Priorizable, self).__setstate__(state)
|
||||
self._index_operations['priors'] = self.priors
|
||||
#self._index_operations['priors'] = self.priors
|
||||
|
||||
|
||||
#===========================================================================
|
||||
|
|
|
|||
|
|
@ -550,3 +550,34 @@ def parametric_mean_function(max_iters=100, optimize=True, plot=True):
|
|||
return m
|
||||
|
||||
|
||||
def warped_gp_cubic_sine(max_iters=100):
|
||||
"""
|
||||
A test replicating the cubic sine regression problem from
|
||||
Snelson's paper.
|
||||
"""
|
||||
X = (2 * np.pi) * np.random.random(151) - np.pi
|
||||
Y = np.sin(X) + np.random.normal(0,0.2,151)
|
||||
Y = np.array([np.power(abs(y),float(1)/3) * (1,-1)[y<0] for y in Y])
|
||||
X = X[:, None]
|
||||
Y = Y[:, None]
|
||||
|
||||
warp_k = GPy.kern.RBF(1)
|
||||
warp_f = GPy.util.warping_functions.TanhFunction(n_terms=2)
|
||||
warp_m = GPy.models.WarpedGP(X, Y, kernel=warp_k, warping_function=warp_f)
|
||||
warp_m['.*\.d'].constrain_fixed(1.0)
|
||||
m = GPy.models.GPRegression(X, Y)
|
||||
m.optimize_restarts(parallel=False, robust=True, num_restarts=5, max_iters=max_iters)
|
||||
warp_m.optimize_restarts(parallel=False, robust=True, num_restarts=5, max_iters=max_iters)
|
||||
#m.optimize(max_iters=max_iters)
|
||||
#warp_m.optimize(max_iters=max_iters)
|
||||
|
||||
print(warp_m)
|
||||
print(warp_m['.*warp.*'])
|
||||
|
||||
warp_m.predict_in_warped_space = False
|
||||
warp_m.plot(title="Warped GP - Latent space")
|
||||
warp_m.predict_in_warped_space = True
|
||||
warp_m.plot(title="Warped GP - Warped space")
|
||||
m.plot(title="Standard GP")
|
||||
warp_m.plot_warping()
|
||||
pb.show()
|
||||
|
|
|
|||
|
|
@ -13,15 +13,21 @@ class Add(CombinationKernel):
|
|||
propagates gradients through.
|
||||
|
||||
This kernel will take over the active dims of it's subkernels passed in.
|
||||
|
||||
NOTE: The subkernels will be copies of the original kernels, to prevent
|
||||
unexpected behavior.
|
||||
"""
|
||||
def __init__(self, subkerns, name='sum'):
|
||||
for i, kern in enumerate(subkerns[:]):
|
||||
_newkerns = []
|
||||
for kern in subkerns:
|
||||
if isinstance(kern, Add):
|
||||
del subkerns[i]
|
||||
for part in kern.parts[::-1]:
|
||||
for part in kern.parts:
|
||||
#kern.unlink_parameter(part)
|
||||
subkerns.insert(i, part.copy())
|
||||
super(Add, self).__init__(subkerns, name)
|
||||
_newkerns.append(part.copy())
|
||||
else:
|
||||
_newkerns.append(kern.copy())
|
||||
|
||||
super(Add, self).__init__(_newkerns, name)
|
||||
self._exact_psicomp = self._check_exact_psicomp()
|
||||
|
||||
def _check_exact_psicomp(self):
|
||||
|
|
|
|||
|
|
@ -135,9 +135,7 @@ class Bias(Static):
|
|||
|
||||
def K(self, X, X2=None):
|
||||
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
|
||||
ret = np.empty(shape, dtype=np.float64)
|
||||
ret[:] = self.variance
|
||||
return ret
|
||||
return np.full(shape, self.variance, dtype=np.float64)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
self.variance.gradient = dL_dK.sum()
|
||||
|
|
@ -146,9 +144,7 @@ class Bias(Static):
|
|||
self.variance.gradient = dL_dKdiag.sum()
|
||||
|
||||
def psi2(self, Z, variational_posterior):
|
||||
ret = np.empty((Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
ret[:] = self.variance*self.variance*variational_posterior.shape[0]
|
||||
return ret
|
||||
return np.full((Z.shape[0], Z.shape[0]), self.variance*self.variance*variational_posterior.shape[0], dtype=np.float64)
|
||||
|
||||
def psi2n(self, Z, variational_posterior):
|
||||
ret = np.empty((variational_posterior.mean.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
|
|
|
|||
|
|
@ -26,3 +26,5 @@ from .dpgplvm import DPBayesianGPLVM
|
|||
from .state_space_model import StateSpace
|
||||
|
||||
from .ibp_lfm import IBPLFM
|
||||
|
||||
from .gp_offset_regression import GPOffsetRegression
|
||||
|
|
|
|||
95
GPy/models/gp_offset_regression.py
Normal file
95
GPy/models/gp_offset_regression.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
# Written by Mike Smith. michaeltsmith.org.uk
|
||||
|
||||
import numpy as np
|
||||
from ..core import GP
|
||||
from .. import likelihoods
|
||||
from .. import kern
|
||||
from ..core import Param
|
||||
|
||||
class GPOffsetRegression(GP):
|
||||
"""
|
||||
Gaussian Process model for offset regression
|
||||
|
||||
:param X: input observations, we assume for this class that this has one dimension of actual inputs and the last dimension should be the index of the cluster (so X should be Nx2)
|
||||
:param Y: observed values (Nx1?)
|
||||
:param kernel: a GPy kernel, defaults to rbf
|
||||
:param Norm normalizer: [False]
|
||||
:param noise_var: the noise variance for Gaussian likelhood, defaults to 1.
|
||||
|
||||
Normalize Y with the norm given.
|
||||
If normalizer is False, no normalization will be done
|
||||
If it is None, we use GaussianNorm(alization)
|
||||
|
||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1., mean_function=None):
|
||||
|
||||
assert X.shape[1]>1, "Need at least two input dimensions, as last dimension is the label of the cluster"
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(X.shape[1]-1)
|
||||
|
||||
#self._log_marginal_likelihood = np.nan #todo
|
||||
|
||||
likelihood = likelihoods.Gaussian(variance=noise_var)
|
||||
self.X_fixed = X[:,:-1]
|
||||
self.selected = np.array([int(x) for x in X[:,-1]])
|
||||
|
||||
|
||||
super(GPOffsetRegression, self).__init__(X, Y, kernel, likelihood, name='GP offset regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function)
|
||||
maxcluster = np.max(self.selected)
|
||||
self.offset = Param('offset', np.zeros(maxcluster))
|
||||
#self.offset.set_prior(...)
|
||||
self.link_parameter(self.offset)
|
||||
|
||||
#def dr_doffset(self, X, sel): #how much r changes wrt the offset hyperparameters
|
||||
|
||||
#def dL_doffset(self, X, sel):
|
||||
# dL_dr = self.dK_dr_via_X(X, X) * dL_dK
|
||||
|
||||
|
||||
def dr_doffset(self,X,sel,delta):
|
||||
#given an input matrix, X and the offsets (delta)
|
||||
#finds dr/dDelta
|
||||
#returns them as a list, one for each offset (delta).
|
||||
#get the input values
|
||||
|
||||
#a matrix G represents the effect of increasing the offset on the radius passed to the kernel for each input. For example
|
||||
#what effect will increasing offset 4 have on the kernel output of inputs 5 and 8? Answer: Gs[4][5,8]... (positive or negative)
|
||||
Gs = []
|
||||
for i,d in enumerate(delta):
|
||||
#X[sel==(i+1)]-=d
|
||||
G = np.repeat(np.array(sel==(i+1))[:,None]*1,len(X),axis=1) - np.repeat(np.array(sel==(i+1))[None,:]*1,len(X),axis=0)
|
||||
Gs.append(G)
|
||||
#does subtracting the two Xs end up positive or negative (if negative we need to flip the sign in G).
|
||||
w = np.repeat(X,len(X),axis=1) - np.repeat(X.T,len(X),axis=0)
|
||||
dr_doffsets = []
|
||||
for i,d in enumerate(delta):
|
||||
dr_doffset = np.sign(w * Gs[i])
|
||||
#print "dr_doffset %d" % i
|
||||
#print dr_doffset
|
||||
#print Gs[i]
|
||||
#print w
|
||||
dr_doffsets.append(dr_doffset)
|
||||
|
||||
#lastly we need to divide by the lengthscale: So far we've found d(X_i - X_j)/dOffsets
|
||||
#we want dr/dOffsets. (X_i - X_j)/lengthscale = r
|
||||
dr_doffsets /= self.kern.lengthscale
|
||||
return dr_doffsets
|
||||
|
||||
def parameters_changed(self):
|
||||
offsets = np.hstack([0.0,self.offset.values])[:,None]
|
||||
|
||||
self.X = self.X_fixed - offsets[self.selected]
|
||||
super(GPOffsetRegression, self).parameters_changed()
|
||||
|
||||
dL_dr = self.kern.dK_dr_via_X(self.X, self.X) * self.grad_dict['dL_dK']
|
||||
|
||||
dr_doff = self.dr_doffset(self.X,self.selected,self.offset.values)
|
||||
for i in range(len(dr_doff)):
|
||||
dL_doff = dL_dr * dr_doff[i]
|
||||
self.offset.gradient[i] = -np.sum(dL_doff)
|
||||
|
||||
|
|
@ -2,65 +2,56 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..util.warping_functions import *
|
||||
#from ..util.warping_functions import *
|
||||
from ..core import GP
|
||||
from .. import likelihoods
|
||||
from GPy.util.warping_functions import TanhWarpingFunction_d
|
||||
from paramz import ObsAr
|
||||
#from GPy.util.warping_functions import TanhFunction
|
||||
from ..util.warping_functions import TanhFunction
|
||||
from GPy import kern
|
||||
|
||||
class WarpedGP(GP):
|
||||
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3):
|
||||
|
||||
"""
|
||||
This defines a GP Regression model that applies a
|
||||
warping function to the output.
|
||||
"""
|
||||
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalizer=False):
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(X.shape[1])
|
||||
|
||||
if warping_function == None:
|
||||
self.warping_function = TanhWarpingFunction_d(warping_terms)
|
||||
self.warping_function = TanhFunction(warping_terms)
|
||||
self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1) * 1)
|
||||
else:
|
||||
self.warping_function = warping_function
|
||||
|
||||
self.scale_data = False
|
||||
if self.scale_data:
|
||||
Y = self._scale_data(Y)
|
||||
#self.has_uncertain_inputs = False
|
||||
self.Y_untransformed = Y.copy()
|
||||
self.predict_in_warped_space = True
|
||||
likelihood = likelihoods.Gaussian()
|
||||
|
||||
GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel)
|
||||
super(WarpedGP, self).__init__(X, Y.copy(), likelihood=likelihood, kernel=kernel, normalizer=normalizer)
|
||||
self.Y_normalized = self.Y_normalized.copy()
|
||||
self.Y_untransformed = self.Y_normalized.copy()
|
||||
self.predict_in_warped_space = True
|
||||
self.link_parameter(self.warping_function)
|
||||
|
||||
def _scale_data(self, Y):
|
||||
self._Ymax = Y.max()
|
||||
self._Ymin = Y.min()
|
||||
return (Y - self._Ymin) / (self._Ymax - self._Ymin) - 0.5
|
||||
|
||||
def _unscale_data(self, Y):
|
||||
return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
|
||||
def set_XY(self, X=None, Y=None):
|
||||
super(WarpedGP, self).set_XY(X, Y)
|
||||
self.Y_untransformed = self.Y_normalized.copy()
|
||||
self.update_model(True)
|
||||
|
||||
def parameters_changed(self):
|
||||
self.Y[:] = self.transform_data()
|
||||
"""
|
||||
Notice that we update the warping function gradients here.
|
||||
"""
|
||||
self.Y_normalized[:] = self.transform_data()
|
||||
super(WarpedGP, self).parameters_changed()
|
||||
|
||||
Kiy = self.posterior.woodbury_vector.flatten()
|
||||
|
||||
grad_y = self.warping_function.fgrad_y(self.Y_untransformed)
|
||||
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed,
|
||||
return_covar_chain=True)
|
||||
djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
|
||||
dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
|
||||
|
||||
warping_grads = -dquad_dpsi + djac_dpsi
|
||||
|
||||
self.warping_function.psi.gradient[:] = warping_grads[:, :-1]
|
||||
self.warping_function.d.gradient[:] = warping_grads[0, -1]
|
||||
self.warping_function.update_grads(self.Y_untransformed, Kiy)
|
||||
|
||||
def transform_data(self):
|
||||
Y = self.warping_function.f(self.Y_untransformed.copy()).copy()
|
||||
return Y
|
||||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
Notice we add the jacobian of the warping function here.
|
||||
"""
|
||||
ll = GP.log_likelihood(self)
|
||||
jacobian = self.warping_function.fgrad_y(self.Y_untransformed)
|
||||
return ll + np.log(jacobian).sum()
|
||||
|
|
@ -73,36 +64,42 @@ class WarpedGP(GP):
|
|||
arg2 = np.ones(shape=gh_samples.shape).dot(mean.T)
|
||||
return self.warping_function.f_inv(arg1 + arg2, y=pred_init)
|
||||
|
||||
def _get_warped_mean(self, mean, std, pred_init=None, deg_gauss_hermite=100):
|
||||
def _get_warped_mean(self, mean, std, pred_init=None, deg_gauss_hermite=20):
|
||||
"""
|
||||
Calculate the warped mean by using Gauss-Hermite quadrature.
|
||||
"""
|
||||
gh_samples, gh_weights = np.polynomial.hermite.hermgauss(deg_gauss_hermite)
|
||||
gh_samples = gh_samples[:,None]
|
||||
gh_weights = gh_weights[None,:]
|
||||
gh_samples = gh_samples[:, None]
|
||||
gh_weights = gh_weights[None, :]
|
||||
return gh_weights.dot(self._get_warped_term(mean, std, gh_samples)) / np.sqrt(np.pi)
|
||||
|
||||
def _get_warped_variance(self, mean, std, pred_init=None, deg_gauss_hermite=100):
|
||||
def _get_warped_variance(self, mean, std, pred_init=None, deg_gauss_hermite=20):
|
||||
"""
|
||||
Calculate the warped variance by using Gauss-Hermite quadrature.
|
||||
"""
|
||||
gh_samples, gh_weights = np.polynomial.hermite.hermgauss(deg_gauss_hermite)
|
||||
gh_samples = gh_samples[:,None]
|
||||
gh_weights = gh_weights[None,:]
|
||||
gh_samples = gh_samples[:, None]
|
||||
gh_weights = gh_weights[None, :]
|
||||
arg1 = gh_weights.dot(self._get_warped_term(mean, std, gh_samples,
|
||||
pred_init=pred_init) ** 2) / np.sqrt(np.pi)
|
||||
arg2 = self._get_warped_mean(mean, std, pred_init=pred_init,
|
||||
deg_gauss_hermite=deg_gauss_hermite)
|
||||
return arg1 - (arg2 ** 2)
|
||||
|
||||
def predict(self, Xnew, which_parts='all', pred_init=None, full_cov=False, Y_metadata=None,
|
||||
median=False, deg_gauss_hermite=100):
|
||||
# normalize X values
|
||||
# Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
|
||||
mu, var = GP._raw_predict(self, Xnew)
|
||||
|
||||
def predict(self, Xnew, kern=None, pred_init=None, Y_metadata=None,
|
||||
median=False, deg_gauss_hermite=20, likelihood=None):
|
||||
"""
|
||||
Prediction results depend on:
|
||||
- The value of the self.predict_in_warped_space flag
|
||||
- The median flag passed as argument
|
||||
The likelihood keyword is never used, it is just to follow the plotting API.
|
||||
"""
|
||||
#mu, var = GP._raw_predict(self, Xnew)
|
||||
# now push through likelihood
|
||||
mean, var = self.likelihood.predictive_values(mu, var)
|
||||
#mean, var = self.likelihood.predictive_values(mu, var)
|
||||
|
||||
mean, var = super(WarpedGP, self).predict(Xnew, kern=kern, full_cov=False, likelihood=likelihood)
|
||||
|
||||
|
||||
if self.predict_in_warped_space:
|
||||
std = np.sqrt(var)
|
||||
|
|
@ -116,13 +113,9 @@ class WarpedGP(GP):
|
|||
else:
|
||||
wmean = mean
|
||||
wvar = var
|
||||
|
||||
if self.scale_data:
|
||||
pred = self._unscale_data(pred)
|
||||
|
||||
return wmean, wvar
|
||||
|
||||
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
|
||||
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, likelihood=None, kern=None):
|
||||
"""
|
||||
Get the predictive quantiles around the prediction at X
|
||||
|
||||
|
|
@ -133,19 +126,38 @@ class WarpedGP(GP):
|
|||
:returns: list of quantiles for each X and predictive quantiles for interval combination
|
||||
:rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)]
|
||||
"""
|
||||
m, v = self._raw_predict(X, full_cov=False)
|
||||
if self.normalizer is not None:
|
||||
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
|
||||
a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
|
||||
#return [a, b]
|
||||
if not self.predict_in_warped_space:
|
||||
return [a, b]
|
||||
#print a.shape
|
||||
new_a = self.warping_function.f_inv(a)
|
||||
new_b = self.warping_function.f_inv(b)
|
||||
qs = super(WarpedGP, self).predict_quantiles(X, quantiles, Y_metadata=Y_metadata, likelihood=likelihood, kern=kern)
|
||||
if self.predict_in_warped_space:
|
||||
return [self.warping_function.f_inv(q) for q in qs]
|
||||
return qs
|
||||
#m, v = self._raw_predict(X, full_cov=False)
|
||||
#if self.normalizer is not None:
|
||||
# m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
|
||||
#a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
|
||||
#if not self.predict_in_warped_space:
|
||||
# return [a, b]
|
||||
#new_a = self.warping_function.f_inv(a)
|
||||
#new_b = self.warping_function.f_inv(b)
|
||||
#return [new_a, new_b]
|
||||
|
||||
return [new_a, new_b]
|
||||
#return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
|
||||
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
|
||||
"""
|
||||
Calculation of the log predictive density. Notice we add
|
||||
the jacobian of the warping function here.
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param x_test: test locations (x_{*})
|
||||
:type x_test: (Nx1) array
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
:param Y_metadata: metadata associated with the test points
|
||||
"""
|
||||
mu_star, var_star = self._raw_predict(x_test)
|
||||
fy = self.warping_function.f(y_test)
|
||||
ll_lpd = self.likelihood.log_predictive_density(fy, mu_star, var_star, Y_metadata=Y_metadata)
|
||||
return ll_lpd + np.log(self.warping_function.fgrad_y(y_test))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
|
|
@ -61,6 +61,8 @@ class AbstractPlottingLibrary(object):
|
|||
"""
|
||||
Get a new figure with nrows and ncolumns subplots.
|
||||
Does not initialize the canvases yet.
|
||||
|
||||
There is individual kwargs for the individual plotting libraries to use.
|
||||
"""
|
||||
raise NotImplementedError("Implement all plot functions in AbstractPlottingLibrary in order to use your own plotting library")
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
import numpy as np
|
||||
from scipy import sparse
|
||||
import itertools
|
||||
from ...models import WarpedGP
|
||||
|
||||
def in_ipynb():
|
||||
try:
|
||||
|
|
@ -295,6 +296,10 @@ def get_x_y_var(model):
|
|||
Y = model.Y.values
|
||||
except AttributeError:
|
||||
Y = model.Y
|
||||
|
||||
if isinstance(model, WarpedGP) and not model.predict_in_warped_space:
|
||||
Y = model.Y_normalized
|
||||
|
||||
if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
|
||||
return X, X_variance, Y
|
||||
|
||||
|
|
@ -381,4 +386,4 @@ def x_frame2D(X,plot_limits=None,resolution=None):
|
|||
resolution = resolution or 50
|
||||
xx, yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
|
||||
Xnew = np.c_[xx.flat, yy.flat]
|
||||
return Xnew, xx, yy, xmin, xmax
|
||||
return Xnew, xx, yy, xmin, xmax
|
||||
|
|
|
|||
|
|
@ -91,18 +91,32 @@ class MiscTests(unittest.TestCase):
|
|||
k = GPy.kern.RBF(1)
|
||||
m2 = GPy.models.GPRegression(self.X, (Y-mu)/std, kernel=k, normalizer=False)
|
||||
m2[:] = m[:]
|
||||
|
||||
mu1, var1 = m.predict(m.X, full_cov=True)
|
||||
mu2, var2 = m2.predict(m2.X, full_cov=True)
|
||||
np.testing.assert_allclose(mu1, (mu2*std)+mu)
|
||||
np.testing.assert_allclose(var1, var2)
|
||||
np.testing.assert_allclose(var1, var2*std**2)
|
||||
|
||||
mu1, var1 = m.predict(m.X, full_cov=False)
|
||||
mu2, var2 = m2.predict(m2.X, full_cov=False)
|
||||
|
||||
np.testing.assert_allclose(mu1, (mu2*std)+mu)
|
||||
np.testing.assert_allclose(var1, var2)
|
||||
np.testing.assert_allclose(var1, var2*std**2)
|
||||
|
||||
q50n = m.predict_quantiles(m.X, (50,))
|
||||
q50 = m2.predict_quantiles(m2.X, (50,))
|
||||
|
||||
np.testing.assert_allclose(q50n[0], (q50[0]*std)+mu)
|
||||
|
||||
# Test variance component:
|
||||
qs = np.array([2.5, 97.5])
|
||||
# The quantiles get computed before unormalization
|
||||
# And transformed using the mean transformation:
|
||||
c = np.random.choice(self.X.shape[0])
|
||||
q95 = m2.predict_quantiles(self.X[[c]], qs)
|
||||
mu, var = m2.predict(self.X[[c]])
|
||||
from scipy.stats import norm
|
||||
np.testing.assert_allclose((mu+(norm.ppf(qs/100.)*np.sqrt(var))).flatten(), np.array(q95).flatten())
|
||||
|
||||
def check_jacobian(self):
|
||||
try:
|
||||
|
|
@ -361,51 +375,95 @@ class MiscTests(unittest.TestCase):
|
|||
preds = m.predict(self.X)
|
||||
|
||||
warp_k = GPy.kern.RBF(1)
|
||||
warp_f = GPy.util.warping_functions.IdentityFunction()
|
||||
warp_m = GPy.models.WarpedGP(self.X, self.Y, kernel=warp_k, warping_function=warp_f)
|
||||
warp_f = GPy.util.warping_functions.IdentityFunction(closed_inverse=False)
|
||||
warp_m = GPy.models.WarpedGP(self.X, self.Y, kernel=warp_k,
|
||||
warping_function=warp_f)
|
||||
warp_m.optimize()
|
||||
warp_preds = warp_m.predict(self.X)
|
||||
|
||||
np.testing.assert_almost_equal(preds, warp_preds)
|
||||
warp_k_exact = GPy.kern.RBF(1)
|
||||
warp_f_exact = GPy.util.warping_functions.IdentityFunction()
|
||||
warp_m_exact = GPy.models.WarpedGP(self.X, self.Y, kernel=warp_k_exact,
|
||||
warping_function=warp_f_exact)
|
||||
warp_m_exact.optimize()
|
||||
warp_preds_exact = warp_m_exact.predict(self.X)
|
||||
|
||||
np.testing.assert_almost_equal(preds, warp_preds, decimal=4)
|
||||
np.testing.assert_almost_equal(preds, warp_preds_exact, decimal=4)
|
||||
|
||||
@unittest.skip('Comment this to plot the modified sine function')
|
||||
def test_warped_gp_sine(self):
|
||||
def test_warped_gp_log(self):
|
||||
"""
|
||||
A test replicating the sine regression problem from
|
||||
Snelson's paper.
|
||||
A WarpedGP with the log warping function should be
|
||||
equal to a standard GP with log labels.
|
||||
Note that we predict the median here.
|
||||
"""
|
||||
k = GPy.kern.RBF(1)
|
||||
Y = np.abs(self.Y)
|
||||
logY = np.log(Y)
|
||||
m = GPy.models.GPRegression(self.X, logY, kernel=k)
|
||||
m.optimize()
|
||||
preds = m.predict(self.X)[0]
|
||||
|
||||
warp_k = GPy.kern.RBF(1)
|
||||
warp_f = GPy.util.warping_functions.LogFunction(closed_inverse=False)
|
||||
warp_m = GPy.models.WarpedGP(self.X, Y, kernel=warp_k,
|
||||
warping_function=warp_f)
|
||||
warp_m.optimize()
|
||||
warp_preds = warp_m.predict(self.X, median=True)[0]
|
||||
|
||||
warp_k_exact = GPy.kern.RBF(1)
|
||||
warp_f_exact = GPy.util.warping_functions.LogFunction()
|
||||
warp_m_exact = GPy.models.WarpedGP(self.X, Y, kernel=warp_k_exact,
|
||||
warping_function=warp_f_exact)
|
||||
warp_m_exact.optimize(messages=True)
|
||||
warp_preds_exact = warp_m_exact.predict(self.X, median=True)[0]
|
||||
|
||||
np.testing.assert_almost_equal(np.exp(preds), warp_preds, decimal=4)
|
||||
np.testing.assert_almost_equal(np.exp(preds), warp_preds_exact, decimal=4)
|
||||
|
||||
def test_warped_gp_cubic_sine(self, max_iters=100):
|
||||
"""
|
||||
A test replicating the cubic sine regression problem from
|
||||
Snelson's paper. This test doesn't have any assertions, it's
|
||||
just to ensure coverage of the tanh warping function code.
|
||||
"""
|
||||
X = (2 * np.pi) * np.random.random(151) - np.pi
|
||||
Y = np.sin(X) + np.random.normal(0,0.1,151)
|
||||
Y = np.exp(Y) - 5
|
||||
#Y = np.array([np.power(abs(y),float(1)/3) * (1,-1)[y<0] for y in Y]) + 0
|
||||
Y = np.sin(X) + np.random.normal(0,0.2,151)
|
||||
Y = np.array([np.power(abs(y),float(1)/3) * (1,-1)[y<0] for y in Y])
|
||||
X = X[:, None]
|
||||
Y = Y[:, None]
|
||||
|
||||
#np.seterr(over='raise')
|
||||
import matplotlib.pyplot as plt
|
||||
warp_k = GPy.kern.RBF(1)
|
||||
warp_f = GPy.util.warping_functions.TanhWarpingFunction_d(n_terms=2)
|
||||
warp_m = GPy.models.WarpedGP(X[:, None], Y[:, None], kernel=warp_k, warping_function=warp_f)
|
||||
#warp_m['.*variance.*'].constrain_fixed(0.25)
|
||||
#warp_m['.*lengthscale.*'].constrain_fixed(1)
|
||||
#warp_m['warp_tanh.d'].constrain_fixed(1)
|
||||
#warp_m.randomize()
|
||||
#warp_m['.*warp_tanh.psi*'][:,0:2].constrain_bounded(0,100)
|
||||
#warp_m['.*warp_tanh.psi*'][:,0:1].constrain_fixed(1)
|
||||
|
||||
#print(warp_m.checkgrad())
|
||||
#warp_m.plot()
|
||||
#plt.show()
|
||||
|
||||
warp_m.optimize_restarts(parallel=True, robust=True)
|
||||
#print(warp_m.checkgrad())
|
||||
print(warp_m)
|
||||
print(warp_m['.*warp.*'])
|
||||
warp_m = GPy.models.WarpedGP(X, Y)#, kernel=warp_k)#, warping_function=warp_f)
|
||||
warp_m['.*\.d'].constrain_fixed(1.0)
|
||||
warp_m.optimize_restarts(parallel=False, robust=False, num_restarts=5,
|
||||
max_iters=max_iters)
|
||||
warp_m.predict(X)
|
||||
warp_m.predict_quantiles(X)
|
||||
warp_m.log_predictive_density(X, Y)
|
||||
warp_m.predict_in_warped_space = False
|
||||
warp_m.plot()
|
||||
warp_m.predict_in_warped_space = True
|
||||
warp_m.plot()
|
||||
warp_f.plot(X.min()-10, X.max()+10)
|
||||
plt.show()
|
||||
|
||||
def test_offset_regression(self):
|
||||
#Tests GPy.models.GPOffsetRegression. Using two small time series
|
||||
#from a sine wave, we confirm the algorithm determines that the
|
||||
#likelihood is maximised when the offset hyperparameter is approximately
|
||||
#equal to the actual offset in X between the two time series.
|
||||
offset = 3
|
||||
X1 = np.arange(0,50,5.0)[:,None]
|
||||
X2 = np.arange(0+offset,50+offset,5.0)[:,None]
|
||||
X = np.vstack([X1,X2])
|
||||
ind = np.vstack([np.zeros([10,1]),np.ones([10,1])])
|
||||
X = np.hstack([X,ind])
|
||||
Y = np.sin((X[0:10,0])/30.0)[:,None]
|
||||
Y = np.vstack([Y,Y])
|
||||
|
||||
m = GPy.models.GPOffsetRegression(X,Y)
|
||||
m.rbf.lengthscale=5.0 #make it something other than one to check our gradients properly!
|
||||
assert m.checkgrad(), "Gradients of offset parameters don't match numerical approximations."
|
||||
m.optimize()
|
||||
assert np.abs(m.offset[0]-offset)<0.1, ("GPOffsetRegression model failing to estimate correct offset (value estimated = %0.2f instead of %0.2f)" % (m.offset[0], offset))
|
||||
|
||||
|
||||
class GradientTests(np.testing.TestCase):
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@
|
|||
#===============================================================================
|
||||
|
||||
import unittest, numpy as np
|
||||
import GPy
|
||||
|
||||
class TestDebug(unittest.TestCase):
|
||||
def test_checkFinite(self):
|
||||
|
|
@ -107,3 +108,50 @@ class TestDebug(unittest.TestCase):
|
|||
self.assertTrue(d[tuple(X[:,4])] == d[tuple(X[:,0])] == [0, 4, 5])
|
||||
self.assertTrue(d[tuple(X[:,1])] == [1])
|
||||
|
||||
def test_offset_cluster(self):
|
||||
#Tests the GPy.util.cluster_with_offset.cluster utility with a small
|
||||
#test data set. Not using random noise just in case it occasionally
|
||||
#causes it not to cluster correctly.
|
||||
#groundtruth cluster identifiers are: [0,1,1,0]
|
||||
|
||||
#data contains a list of the four sets of time series (3 per data point)
|
||||
|
||||
data = [np.array([[ 2.18094245, 1.96529789, 2.00265523, 2.18218742, 2.06795428],
|
||||
[ 1.62254829, 1.75748448, 1.83879347, 1.87531326, 1.52503496],
|
||||
[ 1.54589609, 1.61607914, 2.00463192, 1.48771394, 1.63339218]]),
|
||||
np.array([[ 2.86766106, 2.97953437, 2.91958876, 2.92510506, 3.03239241],
|
||||
[ 2.57368423, 2.59954886, 3.10000395, 2.75806125, 2.89865704],
|
||||
[ 2.58916318, 2.53698259, 2.63858411, 2.63102504, 2.51853901]]),
|
||||
np.array([[ 2.77834168, 2.9618564 , 2.88482141, 3.24259745, 2.9716821 ],
|
||||
[ 2.60675576, 2.67095624, 2.94824436, 2.80520631, 2.87247516],
|
||||
[ 2.49543562, 2.5492281 , 2.6505866 , 2.65015308, 2.59738616]]),
|
||||
np.array([[ 1.76783086, 2.21666738, 2.07939706, 1.9268263 , 2.23360121],
|
||||
[ 1.94305547, 1.94648592, 2.1278921 , 2.09481457, 2.08575238],
|
||||
[ 1.69336013, 1.72285186, 1.6339506 , 1.61212022, 1.39198698]])]
|
||||
|
||||
#inputs contains their associated X values
|
||||
|
||||
inputs = [np.array([[ 0. ],
|
||||
[ 0.68040097],
|
||||
[ 1.20316795],
|
||||
[ 1.798749 ],
|
||||
[ 2.14891733]]), np.array([[ 0. ],
|
||||
[ 0.51910637],
|
||||
[ 0.98259352],
|
||||
[ 1.57442965],
|
||||
[ 1.82515098]]), np.array([[ 0. ],
|
||||
[ 0.66645478],
|
||||
[ 1.59464591],
|
||||
[ 1.69769551],
|
||||
[ 1.80932752]]), np.array([[ 0. ],
|
||||
[ 0.87512108],
|
||||
[ 1.71881079],
|
||||
[ 2.67162871],
|
||||
[ 3.23761907]])]
|
||||
|
||||
#try doing the clustering
|
||||
active = GPy.util.cluster_with_offset.cluster(data,inputs)
|
||||
#check to see that the clustering has correctly clustered the time series.
|
||||
clusters = set([frozenset(cluster) for cluster in active])
|
||||
assert set([1,2]) in clusters, "Offset Clustering algorithm failed"
|
||||
assert set([0,3]) in clusters, "Offset Clustering algoirthm failed"
|
||||
|
|
|
|||
|
|
@ -16,3 +16,4 @@ from . import initialization
|
|||
from . import multioutput
|
||||
from . import parallel
|
||||
from . import functions
|
||||
from . import cluster_with_offset
|
||||
|
|
|
|||
184
GPy/util/cluster_with_offset.py
Normal file
184
GPy/util/cluster_with_offset.py
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
# Copyright (c) 2016, Mike Smith
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import GPy
|
||||
import numpy as np
|
||||
import sys #so I can print dots
|
||||
|
||||
def get_log_likelihood(inputs,data,clust):
|
||||
"""Get the LL of a combined set of clusters, ignoring time series offsets.
|
||||
|
||||
Get the log likelihood of a cluster without worrying about the fact
|
||||
different time series are offset. We're using it here really for those
|
||||
cases in which we only have one cluster to get the loglikelihood of.
|
||||
|
||||
arguments:
|
||||
inputs -- the 'X's in a list, one item per cluster
|
||||
data -- the 'Y's in a list, one item per cluster
|
||||
clust -- list of clusters to use
|
||||
|
||||
returns a tuple:
|
||||
log likelihood and the offset (which is always zero for this model)
|
||||
"""
|
||||
|
||||
S = data[0].shape[0] #number of time series
|
||||
|
||||
#build a new dataset from the clusters, by combining all clusters together
|
||||
X = np.zeros([0,1])
|
||||
Y = np.zeros([0,S])
|
||||
|
||||
#for each person in the cluster,
|
||||
#add their inputs and data to the new dataset
|
||||
for p in clust:
|
||||
X = np.vstack([X,inputs[p]])
|
||||
Y = np.vstack([Y,data[p].T])
|
||||
|
||||
#find the loglikelihood. We just add together the LL for each time series.
|
||||
#ll=0
|
||||
#for s in range(S):
|
||||
# m = GPy.models.GPRegression(X,Y[:,s][:,None])
|
||||
# m.optimize()
|
||||
# ll+=m.log_likelihood()
|
||||
|
||||
m = GPy.models.GPRegression(X,Y)
|
||||
m.optimize()
|
||||
ll=m.log_likelihood()
|
||||
return ll,0
|
||||
|
||||
def get_log_likelihood_offset(inputs,data,clust):
|
||||
"""Get the log likelihood of a combined set of clusters, fitting the offsets
|
||||
|
||||
arguments:
|
||||
inputs -- the 'X's in a list, one item per cluster
|
||||
data -- the 'Y's in a list, one item per cluster
|
||||
clust -- list of clusters to use
|
||||
|
||||
returns a tuple:
|
||||
log likelihood and the offset
|
||||
"""
|
||||
|
||||
#if we've only got one cluster, the model has an error, so we want to just
|
||||
#use normal GPRegression.
|
||||
if len(clust)==1:
|
||||
return get_log_likelihood(inputs,data,clust)
|
||||
|
||||
S = data[0].shape[0] #number of time series
|
||||
|
||||
X = np.zeros([0,2]) #notice the extra column, this is for the cluster index
|
||||
Y = np.zeros([0,S])
|
||||
|
||||
#for each person in the cluster, add their inputs and data to the new
|
||||
#dataset. Note we add an index identifying which person is which data point.
|
||||
#This is for the offset model to use, to allow it to know which data points
|
||||
#to shift.
|
||||
for i,p in enumerate(clust):
|
||||
idx = i*np.ones([inputs[p].shape[0],1])
|
||||
X = np.vstack([X,np.hstack([inputs[p],idx])])
|
||||
Y = np.vstack([Y,data[p].T])
|
||||
|
||||
m = GPy.models.GPOffsetRegression(X,Y)
|
||||
#TODO: How to select a sensible prior?
|
||||
m.offset.set_prior(GPy.priors.Gaussian(0,20))
|
||||
#TODO: Set a sensible start value for the length scale,
|
||||
#make it long to help the offset fit.
|
||||
|
||||
m.optimize()
|
||||
|
||||
ll = m.log_likelihood()
|
||||
offset = m.offset.values[0]
|
||||
return ll,offset
|
||||
|
||||
def cluster(data,inputs,verbose=False):
|
||||
"""Clusters data
|
||||
|
||||
Using the new offset model, this method uses a greedy algorithm to cluster
|
||||
the data. It starts with all the data points in separate clusters and tests
|
||||
whether combining them increases the overall log-likelihood (LL). It then
|
||||
iteratively joins pairs of clusters which cause the greatest increase in
|
||||
the LL, until no join increases the LL.
|
||||
|
||||
arguments:
|
||||
inputs -- the 'X's in a list, one item per cluster
|
||||
data -- the 'Y's in a list, one item per cluster
|
||||
|
||||
returns a list of the clusters.
|
||||
"""
|
||||
N=len(data)
|
||||
|
||||
|
||||
#Define a set of N active cluster
|
||||
active = []
|
||||
for p in range(0,N):
|
||||
active.append([p])
|
||||
|
||||
loglikes = np.zeros(len(active))
|
||||
loglikes[:] = None
|
||||
|
||||
pairloglikes = np.zeros([len(active),len(active)])
|
||||
pairloglikes[:] = None
|
||||
pairoffset = np.zeros([len(active),len(active)])
|
||||
|
||||
it = 0
|
||||
while True:
|
||||
|
||||
if verbose:
|
||||
it +=1
|
||||
print("Iteration %d" % it)
|
||||
|
||||
#Compute the log-likelihood of each cluster (add them together)
|
||||
for clusti in range(len(active)):
|
||||
if verbose:
|
||||
sys.stdout.write('.')
|
||||
sys.stdout.flush()
|
||||
if np.isnan(loglikes[clusti]):
|
||||
loglikes[clusti], unused_offset = get_log_likelihood_offset(inputs,data,[clusti])
|
||||
|
||||
#try combining with each other cluster...
|
||||
for clustj in range(clusti): #count from 0 to clustj-1
|
||||
temp = [clusti,clustj]
|
||||
if np.isnan(pairloglikes[clusti,clustj]):
|
||||
pairloglikes[clusti,clustj],pairoffset[clusti,clustj] = get_log_likelihood_offset(inputs,data,temp)
|
||||
|
||||
seploglikes = np.repeat(loglikes[:,None].T,len(loglikes),0)+np.repeat(loglikes[:,None],len(loglikes),1)
|
||||
loglikeimprovement = pairloglikes - seploglikes #how much likelihood improves with clustering
|
||||
top = np.unravel_index(np.nanargmax(pairloglikes-seploglikes), pairloglikes.shape)
|
||||
|
||||
#if loglikeimprovement.shape[0]<3:
|
||||
# #no more clustering to do - this shouldn't happen really unless
|
||||
# #we've set the threshold to apply clustering to less than 0
|
||||
# break
|
||||
|
||||
#if theres further clustering to be done...
|
||||
if loglikeimprovement[top[0],top[1]]>0:
|
||||
active[top[0]].extend(active[top[1]])
|
||||
offset=pairoffset[top[0],top[1]]
|
||||
inputs[top[0]] = np.vstack([inputs[top[0]],inputs[top[1]]-offset])
|
||||
data[top[0]] = np.hstack([data[top[0]],data[top[1]]])
|
||||
del inputs[top[1]]
|
||||
del data[top[1]]
|
||||
del active[top[1]]
|
||||
|
||||
#None = message to say we need to recalculate
|
||||
pairloglikes[:,top[0]] = None
|
||||
pairloglikes[top[0],:] = None
|
||||
pairloglikes = np.delete(pairloglikes,top[1],0)
|
||||
pairloglikes = np.delete(pairloglikes,top[1],1)
|
||||
loglikes[top[0]] = None
|
||||
loglikes = np.delete(loglikes,top[1])
|
||||
else:
|
||||
break
|
||||
|
||||
#if loglikeimprovement[top[0],top[1]]>0:
|
||||
# print "joined"
|
||||
# print top
|
||||
# print offset
|
||||
# print offsets
|
||||
# print offsets[top[1]]-offsets[top[0]]
|
||||
|
||||
#TODO Add a way to return the offsets applied to all the time series
|
||||
return active
|
||||
|
||||
#starttime = time.time()
|
||||
#active = cluster(data,inputs)
|
||||
#endtime = time.time()
|
||||
#print "TOTAL TIME %0.4f" % (endtime-starttime)
|
||||
|
|
@ -6,7 +6,7 @@ Created on Aug 27, 2014
|
|||
import logging
|
||||
import numpy as np
|
||||
|
||||
class Norm(object):
|
||||
class _Norm(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
def scale_by(self, Y):
|
||||
|
|
@ -18,7 +18,8 @@ class Norm(object):
|
|||
"""
|
||||
Project Y into normalized space
|
||||
"""
|
||||
raise NotImplementedError
|
||||
if not self.scaled():
|
||||
raise AttributeError("Norm object not initialized yet, try calling scale_by(data) first.")
|
||||
def inverse_mean(self, X):
|
||||
"""
|
||||
Project the normalized object X into space of Y
|
||||
|
|
@ -31,15 +32,46 @@ class Norm(object):
|
|||
Whether this Norm object has been initialized.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
class MeanNorm(Norm):
|
||||
|
||||
class Standardize(_Norm):
|
||||
def __init__(self):
|
||||
self.mean = None
|
||||
def scale_by(self, Y):
|
||||
Y = np.ma.masked_invalid(Y, copy=False)
|
||||
self.mean = Y.mean(0).view(np.ndarray)
|
||||
self.std = Y.std(0).view(np.ndarray)
|
||||
def normalize(self, Y):
|
||||
return Y-self.mean
|
||||
super(Standardize, self).normalize(Y)
|
||||
return (Y-self.mean)/self.std
|
||||
def inverse_mean(self, X):
|
||||
return X+self.mean
|
||||
return (X*self.std)+self.mean
|
||||
def inverse_variance(self, var):
|
||||
return (var*(self.std**2))
|
||||
def scaled(self):
|
||||
return self.mean is not None
|
||||
|
||||
# Inverse variance to be implemented, disabling for now
|
||||
# If someone in the future want to implement this,
|
||||
# we need to implement the inverse variance for
|
||||
# normalization. This means, we need to know the factor
|
||||
# for the variance to be multiplied to the variance in
|
||||
# normalized space. This is easy to compute for standardization
|
||||
# (see above) but gets tricky here.
|
||||
# class Normalize(_Norm):
|
||||
# def __init__(self):
|
||||
# self.ymin = None
|
||||
# self.ymax = None
|
||||
# def scale_by(self, Y):
|
||||
# Y = np.ma.masked_invalid(Y, copy=False)
|
||||
# self.ymin = Y.min(0).view(np.ndarray)
|
||||
# self.ymax = Y.max(0).view(np.ndarray)
|
||||
# def normalize(self, Y):
|
||||
# super(Normalize, self).normalize(Y)
|
||||
# return (Y - self.ymin) / (self.ymax - self.ymin) - .5
|
||||
# def inverse_mean(self, X):
|
||||
# return (X + .5) * (self.ymax - self.ymin) + self.ymin
|
||||
# def inverse_variance(self, var):
|
||||
#
|
||||
# return (var*(self.std**2))
|
||||
# def scaled(self):
|
||||
# return (self.ymin is not None) and (self.ymax is not None)
|
||||
|
|
@ -4,6 +4,7 @@
|
|||
import numpy as np
|
||||
from ..core.parameterization import Parameterized, Param
|
||||
from paramz.transformations import Logexp
|
||||
import sys
|
||||
|
||||
|
||||
class WarpingFunction(Parameterized):
|
||||
|
|
@ -14,6 +15,7 @@ class WarpingFunction(Parameterized):
|
|||
|
||||
def __init__(self, name):
|
||||
super(WarpingFunction, self).__init__(name=name)
|
||||
self.rate = 0.1
|
||||
|
||||
def f(self, y, psi):
|
||||
"""function transformation
|
||||
|
|
@ -29,15 +31,32 @@ class WarpingFunction(Parameterized):
|
|||
"""gradient of f w.r.t to y"""
|
||||
raise NotImplementedError
|
||||
|
||||
def f_inv(self, z, psi):
|
||||
"""inverse function transformation"""
|
||||
raise NotImplementedError
|
||||
def f_inv(self, z, max_iterations=250, y=None):
|
||||
"""
|
||||
Calculate the numerical inverse of f. This should be
|
||||
overwritten for specific warping functions where the
|
||||
inverse can be found in closed form.
|
||||
|
||||
def _get_param_names(self):
|
||||
raise NotImplementedError
|
||||
:param max_iterations: maximum number of N.R. iterations
|
||||
"""
|
||||
|
||||
z = z.copy()
|
||||
y = np.ones_like(z)
|
||||
|
||||
it = 0
|
||||
update = np.inf
|
||||
while np.abs(update).sum() > 1e-10 and it < max_iterations:
|
||||
fy = self.f(y)
|
||||
fgrady = self.fgrad_y(y)
|
||||
update = (fy - z) / fgrady
|
||||
y -= self.rate * update
|
||||
it += 1
|
||||
#if it == max_iterations:
|
||||
# print("WARNING!!! Maximum number of iterations reached in f_inv ")
|
||||
# print("Sum of roots: %.4f" % np.sum(fy - z))
|
||||
return y
|
||||
|
||||
def plot(self, xmin, xmax):
|
||||
psi = self.psi
|
||||
y = np.arange(xmin, xmax, 0.01)
|
||||
f_y = self.f(y)
|
||||
from matplotlib import pyplot as plt
|
||||
|
|
@ -49,183 +68,59 @@ class WarpingFunction(Parameterized):
|
|||
plt.show()
|
||||
|
||||
|
||||
class TanhWarpingFunction(WarpingFunction):
|
||||
|
||||
def __init__(self, n_terms=3):
|
||||
"""n_terms specifies the number of tanh terms to be used"""
|
||||
self.n_terms = n_terms
|
||||
self.num_parameters = 3 * self.n_terms
|
||||
super(TanhWarpingFunction, self).__init__(name='warp_tanh')
|
||||
|
||||
def f(self, y, psi):
|
||||
class TanhFunction(WarpingFunction):
|
||||
"""
|
||||
This is the function proposed in Snelson et al.:
|
||||
A sum of tanh functions with linear trends outside
|
||||
the range. Notice the term 'd', which scales the
|
||||
linear trend.
|
||||
"""
|
||||
def __init__(self, n_terms=3, initial_y=None):
|
||||
"""
|
||||
transform y with f using parameter vector psi
|
||||
psi = [[a,b,c]]
|
||||
::math::`f = \\sum_{terms} a * tanh(b*(y+c))`
|
||||
n_terms specifies the number of tanh terms to be used
|
||||
"""
|
||||
#1. check that number of params is consistent
|
||||
assert psi.shape[0] == self.n_terms, 'inconsistent parameter dimensions'
|
||||
assert psi.shape[1] == 3, 'inconsistent parameter dimensions'
|
||||
|
||||
#2. exponentiate the a and b (positive!)
|
||||
mpsi = psi.copy()
|
||||
|
||||
#3. transform data
|
||||
z = y.copy()
|
||||
for i in range(len(mpsi)):
|
||||
a,b,c = mpsi[i]
|
||||
z += a*np.tanh(b*(y+c))
|
||||
return z
|
||||
|
||||
def f_inv(self, y, psi, iterations=10):
|
||||
"""
|
||||
calculate the numerical inverse of f
|
||||
:param iterations: number of N.R. iterations
|
||||
"""
|
||||
|
||||
y = y.copy()
|
||||
z = np.ones_like(y)
|
||||
for i in range(iterations):
|
||||
z -= (self.f(z, psi) - y)/self.fgrad_y(z,psi)
|
||||
return z
|
||||
|
||||
def fgrad_y(self, y, psi, return_precalc=False):
|
||||
"""
|
||||
gradient of f w.r.t to y ([N x 1])
|
||||
returns: Nx1 vector of derivatives, unless return_precalc is true,
|
||||
then it also returns the precomputed stuff
|
||||
"""
|
||||
|
||||
mpsi = psi.copy()
|
||||
|
||||
# vectorized version
|
||||
|
||||
# S = (mpsi[:,1]*(y + mpsi[:,2])).T
|
||||
S = (mpsi[:,1]*(y[:,:,None] + mpsi[:,2])).T
|
||||
R = np.tanh(S)
|
||||
D = 1-R**2
|
||||
|
||||
# GRAD = (1+(mpsi[:,0:1]*mpsi[:,1:2]*D).sum(axis=0))[:,np.newaxis]
|
||||
GRAD = (1+(mpsi[:,0:1][:,:,None]*mpsi[:,1:2][:,:,None]*D).sum(axis=0)).T
|
||||
|
||||
if return_precalc:
|
||||
# return GRAD,S.sum(axis=1),R.sum(axis=1),D.sum(axis=1)
|
||||
return GRAD, S, R, D
|
||||
|
||||
return GRAD
|
||||
|
||||
def fgrad_y_psi(self, y, psi, return_covar_chain=False):
|
||||
"""
|
||||
gradient of f w.r.t to y and psi
|
||||
returns: NxIx3 tensor of partial derivatives
|
||||
"""
|
||||
|
||||
# 1. exponentiate the a and b (positive!)
|
||||
mpsi = psi.copy()
|
||||
w, s, r, d = self.fgrad_y(y, psi, return_precalc = True)
|
||||
|
||||
gradients = np.zeros((y.shape[0], y.shape[1], len(mpsi), 3))
|
||||
for i in range(len(mpsi)):
|
||||
a,b,c = mpsi[i]
|
||||
gradients[:,:,i,0] = (b*(1.0/np.cosh(s[i]))**2).T
|
||||
gradients[:,:,i,1] = a*(d[i] - 2.0*s[i]*r[i]*(1.0/np.cosh(s[i]))**2).T
|
||||
gradients[:,:,i,2] = (-2.0*a*(b**2)*r[i]*((1.0/np.cosh(s[i]))**2)).T
|
||||
|
||||
if return_covar_chain:
|
||||
covar_grad_chain = np.zeros((y.shape[0], y.shape[1], len(mpsi), 3))
|
||||
|
||||
for i in range(len(mpsi)):
|
||||
a,b,c = mpsi[i]
|
||||
covar_grad_chain[:, :, i, 0] = (r[i]).T
|
||||
covar_grad_chain[:, :, i, 1] = (a*(y + c) * ((1.0/np.cosh(s[i]))**2).T)
|
||||
covar_grad_chain[:, :, i, 2] = a*b*((1.0/np.cosh(s[i]))**2).T
|
||||
|
||||
return gradients, covar_grad_chain
|
||||
|
||||
return gradients
|
||||
|
||||
def _get_param_names(self):
|
||||
variables = ['a', 'b', 'c']
|
||||
names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)] for q in range(self.n_terms)],[])
|
||||
return names
|
||||
|
||||
|
||||
class TanhWarpingFunction_d(WarpingFunction):
|
||||
|
||||
def __init__(self, n_terms=3):
|
||||
"""n_terms specifies the number of tanh terms to be used"""
|
||||
self.n_terms = n_terms
|
||||
self.num_parameters = 3 * self.n_terms + 1
|
||||
self.psi = np.ones((self.n_terms, 3))
|
||||
|
||||
super(TanhWarpingFunction_d, self).__init__(name='warp_tanh')
|
||||
super(TanhFunction, self).__init__(name='warp_tanh')
|
||||
self.psi = Param('psi', self.psi)
|
||||
self.psi[:, :2].constrain_positive()
|
||||
|
||||
self.d = Param('%s' % ('d'), 1.0, Logexp())
|
||||
self.link_parameter(self.psi)
|
||||
self.link_parameter(self.d)
|
||||
self.initial_y = initial_y
|
||||
|
||||
def f(self, y):
|
||||
"""
|
||||
Transform y with f using parameter vector psi
|
||||
psi = [[a,b,c]]
|
||||
|
||||
:math:`f = \\sum_{terms} a * tanh(b*(y+c))`
|
||||
:math:`f = (y * d) + \\sum_{terms} a * tanh(b *(y + c))`
|
||||
"""
|
||||
#1. check that number of params is consistent
|
||||
# assert psi.shape[0] == self.n_terms, 'inconsistent parameter dimensions'
|
||||
# assert psi.shape[1] == 4, 'inconsistent parameter dimensions'
|
||||
|
||||
d = self.d
|
||||
mpsi = self.psi
|
||||
|
||||
#3. transform data
|
||||
z = d*y.copy()
|
||||
z = d * y.copy()
|
||||
for i in range(len(mpsi)):
|
||||
a,b,c = mpsi[i]
|
||||
z += a*np.tanh(b*(y+c))
|
||||
a, b, c = mpsi[i]
|
||||
z += a * np.tanh(b * (y + c))
|
||||
return z
|
||||
|
||||
def f_inv(self, z, max_iterations=1000, y=None):
|
||||
"""
|
||||
calculate the numerical inverse of f
|
||||
|
||||
:param max_iterations: maximum number of N.R. iterations
|
||||
"""
|
||||
|
||||
z = z.copy()
|
||||
if y is None:
|
||||
y = np.ones_like(z)
|
||||
|
||||
it = 0
|
||||
update = np.inf
|
||||
|
||||
while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
|
||||
fy = self.f(y)
|
||||
fgrady = self.fgrad_y(y)
|
||||
update = (fy - z)/fgrady
|
||||
y -= update
|
||||
it += 1
|
||||
if it == max_iterations:
|
||||
print("WARNING!!! Maximum number of iterations reached in f_inv ")
|
||||
return y
|
||||
|
||||
def fgrad_y(self, y, return_precalc=False):
|
||||
"""
|
||||
gradient of f w.r.t to y ([N x 1])
|
||||
|
||||
:returns: Nx1 vector of derivatives, unless return_precalc is true, then it also returns the precomputed stuff
|
||||
:returns: Nx1 vector of derivatives, unless return_precalc is true,
|
||||
then it also returns the precomputed stuff
|
||||
"""
|
||||
d = self.d
|
||||
mpsi = self.psi
|
||||
|
||||
# vectorized version
|
||||
S = (mpsi[:,1]*(y[:,:,None] + mpsi[:,2])).T
|
||||
S = (mpsi[:,1] * (y[:,:,None] + mpsi[:,2])).T
|
||||
R = np.tanh(S)
|
||||
D = 1-R**2
|
||||
D = 1 - (R ** 2)
|
||||
|
||||
GRAD = (d + (mpsi[:,0:1][:,:,None]*mpsi[:,1:2][:,:,None]*D).sum(axis=0)).T
|
||||
GRAD = (d + (mpsi[:,0:1][:,:,None] * mpsi[:,1:2][:,:,None] * D).sum(axis=0)).T
|
||||
|
||||
if return_precalc:
|
||||
return GRAD, S, R, D
|
||||
|
|
@ -244,45 +139,79 @@ class TanhWarpingFunction_d(WarpingFunction):
|
|||
gradients = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4))
|
||||
for i in range(len(mpsi)):
|
||||
a,b,c = mpsi[i]
|
||||
gradients[:,:,i,0] = (b*(1.0/np.cosh(s[i]))**2).T
|
||||
gradients[:,:,i,1] = a*(d[i] - 2.0*s[i]*r[i]*(1.0/np.cosh(s[i]))**2).T
|
||||
gradients[:,:,i,2] = (-2.0*a*(b**2)*r[i]*((1.0/np.cosh(s[i]))**2)).T
|
||||
gradients[:,:,0,3] = 1.0
|
||||
gradients[:, :, i, 0] = (b * (1.0/np.cosh(s[i])) ** 2).T
|
||||
gradients[:, :, i, 1] = a * (d[i] - 2.0 * s[i] * r[i] * (1.0/np.cosh(s[i])) ** 2).T
|
||||
gradients[:, :, i, 2] = (-2.0 * a * (b ** 2) * r[i] * ((1.0 / np.cosh(s[i])) ** 2)).T
|
||||
gradients[:, :, 0, 3] = 1.0
|
||||
|
||||
if return_covar_chain:
|
||||
covar_grad_chain = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4))
|
||||
|
||||
for i in range(len(mpsi)):
|
||||
a,b,c = mpsi[i]
|
||||
covar_grad_chain[:, :, i, 0] = (r[i]).T
|
||||
covar_grad_chain[:, :, i, 1] = (a*(y + c) * ((1.0/np.cosh(s[i]))**2).T)
|
||||
covar_grad_chain[:, :, i, 2] = a*b*((1.0/np.cosh(s[i]))**2).T
|
||||
covar_grad_chain[:, :, i, 1] = (a * (y + c) * ((1.0 / np.cosh(s[i])) ** 2).T)
|
||||
covar_grad_chain[:, :, i, 2] = a * b * ((1.0 / np.cosh(s[i])) ** 2).T
|
||||
covar_grad_chain[:, :, 0, 3] = y
|
||||
|
||||
return gradients, covar_grad_chain
|
||||
|
||||
return gradients
|
||||
|
||||
def _get_param_names(self):
|
||||
variables = ['a', 'b', 'c', 'd']
|
||||
names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)] for q in range(self.n_terms)],[])
|
||||
names.append('warp_tanh_d')
|
||||
return names
|
||||
def update_grads(self, Y_untransformed, Kiy):
|
||||
grad_y = self.fgrad_y(Y_untransformed)
|
||||
grad_y_psi, grad_psi = self.fgrad_y_psi(Y_untransformed,
|
||||
return_covar_chain=True)
|
||||
djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
|
||||
dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
|
||||
|
||||
warping_grads = -dquad_dpsi + djac_dpsi
|
||||
|
||||
self.psi.gradient[:] = warping_grads[:, :-1]
|
||||
self.d.gradient[:] = warping_grads[0, -1]
|
||||
|
||||
|
||||
class LogFunction(WarpingFunction):
|
||||
"""
|
||||
Easy wrapper for applying a fixed log warping function to
|
||||
positive-only values.
|
||||
The closed_inverse flag should only be set to False for
|
||||
debugging and testing purposes.
|
||||
"""
|
||||
def __init__(self, closed_inverse=True):
|
||||
self.num_parameters = 0
|
||||
super(LogFunction, self).__init__(name='log')
|
||||
if closed_inverse:
|
||||
self.f_inv = self._f_inv
|
||||
|
||||
def f(self, y):
|
||||
return np.log(y)
|
||||
|
||||
def fgrad_y(self, y):
|
||||
return 1. / y
|
||||
|
||||
def update_grads(self, Y_untransformed, Kiy):
|
||||
pass
|
||||
|
||||
def fgrad_y_psi(self, y, return_covar_chain=False):
|
||||
if return_covar_chain:
|
||||
return 0, 0
|
||||
return 0
|
||||
|
||||
def _f_inv(self, z, y=None):
|
||||
return np.exp(z)
|
||||
|
||||
|
||||
class IdentityFunction(WarpingFunction):
|
||||
"""
|
||||
Identity warping function. This is for testing and sanity check purposes
|
||||
and should not be used in practice.
|
||||
The closed_inverse flag should only be set to False for
|
||||
debugging and testing purposes.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.num_parameters = 4
|
||||
self.psi = Param('psi', np.zeros((1,3)))
|
||||
self.d = Param('%s' % ('d'), 1.0, Logexp())
|
||||
def __init__(self, closed_inverse=True):
|
||||
self.num_parameters = 0
|
||||
super(IdentityFunction, self).__init__(name='identity')
|
||||
self.link_parameter(self.psi)
|
||||
self.link_parameter(self.d)
|
||||
|
||||
if closed_inverse:
|
||||
self.f_inv = self._f_inv
|
||||
|
||||
def f(self, y):
|
||||
return y
|
||||
|
|
@ -290,11 +219,14 @@ class IdentityFunction(WarpingFunction):
|
|||
def fgrad_y(self, y):
|
||||
return np.ones(y.shape)
|
||||
|
||||
def update_grads(self, Y_untransformed, Kiy):
|
||||
pass
|
||||
|
||||
def fgrad_y_psi(self, y, return_covar_chain=False):
|
||||
gradients = np.zeros((y.shape[0], y.shape[1], len(self.psi), 4))
|
||||
if return_covar_chain:
|
||||
return gradients, gradients
|
||||
return gradients
|
||||
return 0, 0
|
||||
return 0
|
||||
|
||||
def f_inv(self, z, y=None):
|
||||
def _f_inv(self, z, y=None):
|
||||
return z
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ The Gaussian processes framework in Python.
|
|||
* Travis-CI [unit-tests](https://travis-ci.org/SheffieldML/GPy)
|
||||
* [](http://opensource.org/licenses/BSD-3-Clause)
|
||||
|
||||
[](https://travis-ci.org/SheffieldML/GPy) [](https://ci.appveyor.com/project/mzwiessele/gpy/branch/deploy) [](https://coveralls.io/github/SheffieldML/GPy?branch=devel) [](http://codecov.io/github/SheffieldML/GPy?branch=devel) [](http://depsy.org/package/python/GPy) [](https://landscape.io/github/SheffieldML/GPy/devel)
|
||||
[](https://travis-ci.org/SheffieldML/GPy) [](https://ci.appveyor.com/project/mzwiessele/gpy/branch/deploy) [](https://coveralls.io/github/SheffieldML/GPy?branch=devel) [](http://codecov.io/github/SheffieldML/GPy?branch=devel) [](http://depsy.org/package/python/GPy) [](https://landscape.io/github/SheffieldML/GPy/devel)
|
||||
|
||||
## Updated Structure
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ environment:
|
|||
secure: 8/ZjXFwtd1S7ixd7PJOpptupKKEDhm2da/q3unabJ00=
|
||||
COVERALLS_REPO_TOKEN:
|
||||
secure: d3Luic/ESkGaWnZrvWZTKrzO+xaVwJWaRCEP0F+K/9DQGPSRZsJ/Du5g3s4XF+tS
|
||||
gpy_version: 1.2.1
|
||||
gpy_version: 1.4.0
|
||||
matrix:
|
||||
- PYTHON_VERSION: 2.7
|
||||
MINICONDA: C:\Miniconda-x64
|
||||
|
|
@ -71,10 +71,10 @@ deploy_script:
|
|||
- echo password:%pip_access% >> %USERPROFILE%\\.pypirc
|
||||
- ps: >-
|
||||
if ($env:APPVEYOR_REPO_BRANCH -eq 'devel') {
|
||||
twine upload -r test dist/*
|
||||
twine upload --skip-existing -r test dist/*
|
||||
}
|
||||
elseif ($env:APPVEYOR_REPO_BRANCH -eq 'deploy') {
|
||||
twine upload dist/*
|
||||
twine upload --skip-existing dist/*
|
||||
}
|
||||
else {
|
||||
echo not deploying on other branches
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
[bumpversion]
|
||||
current_version = 1.2.1
|
||||
current_version = 1.4.0
|
||||
tag = False
|
||||
commit = True
|
||||
|
||||
|
|
|
|||
2
setup.py
2
setup.py
|
|
@ -148,7 +148,7 @@ setup(name = 'GPy',
|
|||
include_package_data = True,
|
||||
py_modules = ['GPy.__init__'],
|
||||
test_suite = 'GPy.testing',
|
||||
install_requires = ['numpy>=1.7', 'scipy>=0.16', 'six', 'paramz>=0.5.2'],
|
||||
install_requires = ['numpy>=1.7', 'scipy>=0.16', 'six', 'paramz>=0.6.6'],
|
||||
extras_require = {'docs':['sphinx'],
|
||||
'optional':['mpi4py',
|
||||
'ipython>=4.0.0',
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue