Merge pull request #426 from SheffieldML/devel

some fixes from issues and beckdaniels warped gp improvements
This commit is contained in:
Max Zwiessele 2016-08-17 16:29:50 +01:00 committed by GitHub
commit 2bb8161937
22 changed files with 722 additions and 310 deletions

View file

@ -14,6 +14,8 @@ from . import testing
from . import kern
from . import plotting
from .util import normalizer
# backwards compatibility
import sys
backwards_compatibility = ['lists_and_dicts', 'observable_array', 'ties_and_remappings', 'index_operations']

View file

@ -1 +1 @@
__version__ = "1.2.1"
__version__ = "1.4.0"

View file

@ -2,17 +2,17 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from .. import kern
from GPy.core.model import Model
from paramz import ObsAr
from .model import Model
from .parameterization.variational import VariationalPosterior
from .mapping import Mapping
from .. import likelihoods
from .. import kern
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from GPy.core.parameterization.variational import VariationalPosterior
from ..util.normalizer import Standardize
from paramz import ObsAr
import logging
import warnings
from GPy.util.normalizer import MeanNorm
logger = logging.getLogger("GP")
class GP(Model):
@ -28,7 +28,7 @@ class GP(Model):
:param Norm normalizer:
normalize the outputs Y.
Prediction will be un-normalized using this normalizer.
If normalizer is None, we will normalize using MeanNorm.
If normalizer is None, we will normalize using Standardize.
If normalizer is False, no normalization will be done.
.. Note:: Multiple independent outputs are allowed using columns of Y
@ -49,7 +49,7 @@ class GP(Model):
logger.info("initializing Y")
if normalizer is True:
self.normalizer = MeanNorm()
self.normalizer = Standardize()
elif normalizer is False:
self.normalizer = None
else:
@ -64,6 +64,7 @@ class GP(Model):
self.Y_normalized = self.Y
else:
self.Y = Y
self.Y_normalized = self.Y
if Y.shape[0] != self.num_data:
#There can be cases where we want inputs than outputs, for example if we have multiple latent
@ -248,14 +249,16 @@ class GP(Model):
"""
#predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
if self.normalizer is not None:
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
if include_likelihood:
# now push through likelihood
if likelihood is None:
likelihood = self.likelihood
mu, var = likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
if self.normalizer is not None:
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
return mu, var
def predict_noiseless(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
@ -300,11 +303,14 @@ class GP(Model):
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
"""
m, v = self._raw_predict(X, full_cov=False, kern=kern)
if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
if likelihood is None:
likelihood = self.likelihood
return likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
quantiles = likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
if self.normalizer is not None:
quantiles = [self.normalizer.inverse_mean(q) for q in quantiles]
return quantiles
def predictive_gradients(self, Xnew, kern=None):
"""

View file

@ -16,7 +16,7 @@ class Priorizable(Parameterizable):
def __setstate__(self, state):
super(Priorizable, self).__setstate__(state)
self._index_operations['priors'] = self.priors
#self._index_operations['priors'] = self.priors
#===========================================================================

View file

@ -550,3 +550,34 @@ def parametric_mean_function(max_iters=100, optimize=True, plot=True):
return m
def warped_gp_cubic_sine(max_iters=100):
"""
A test replicating the cubic sine regression problem from
Snelson's paper.
"""
X = (2 * np.pi) * np.random.random(151) - np.pi
Y = np.sin(X) + np.random.normal(0,0.2,151)
Y = np.array([np.power(abs(y),float(1)/3) * (1,-1)[y<0] for y in Y])
X = X[:, None]
Y = Y[:, None]
warp_k = GPy.kern.RBF(1)
warp_f = GPy.util.warping_functions.TanhFunction(n_terms=2)
warp_m = GPy.models.WarpedGP(X, Y, kernel=warp_k, warping_function=warp_f)
warp_m['.*\.d'].constrain_fixed(1.0)
m = GPy.models.GPRegression(X, Y)
m.optimize_restarts(parallel=False, robust=True, num_restarts=5, max_iters=max_iters)
warp_m.optimize_restarts(parallel=False, robust=True, num_restarts=5, max_iters=max_iters)
#m.optimize(max_iters=max_iters)
#warp_m.optimize(max_iters=max_iters)
print(warp_m)
print(warp_m['.*warp.*'])
warp_m.predict_in_warped_space = False
warp_m.plot(title="Warped GP - Latent space")
warp_m.predict_in_warped_space = True
warp_m.plot(title="Warped GP - Warped space")
m.plot(title="Standard GP")
warp_m.plot_warping()
pb.show()

View file

@ -13,15 +13,21 @@ class Add(CombinationKernel):
propagates gradients through.
This kernel will take over the active dims of it's subkernels passed in.
NOTE: The subkernels will be copies of the original kernels, to prevent
unexpected behavior.
"""
def __init__(self, subkerns, name='sum'):
for i, kern in enumerate(subkerns[:]):
_newkerns = []
for kern in subkerns:
if isinstance(kern, Add):
del subkerns[i]
for part in kern.parts[::-1]:
for part in kern.parts:
#kern.unlink_parameter(part)
subkerns.insert(i, part.copy())
super(Add, self).__init__(subkerns, name)
_newkerns.append(part.copy())
else:
_newkerns.append(kern.copy())
super(Add, self).__init__(_newkerns, name)
self._exact_psicomp = self._check_exact_psicomp()
def _check_exact_psicomp(self):

View file

@ -135,9 +135,7 @@ class Bias(Static):
def K(self, X, X2=None):
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
ret = np.empty(shape, dtype=np.float64)
ret[:] = self.variance
return ret
return np.full(shape, self.variance, dtype=np.float64)
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = dL_dK.sum()
@ -146,9 +144,7 @@ class Bias(Static):
self.variance.gradient = dL_dKdiag.sum()
def psi2(self, Z, variational_posterior):
ret = np.empty((Z.shape[0], Z.shape[0]), dtype=np.float64)
ret[:] = self.variance*self.variance*variational_posterior.shape[0]
return ret
return np.full((Z.shape[0], Z.shape[0]), self.variance*self.variance*variational_posterior.shape[0], dtype=np.float64)
def psi2n(self, Z, variational_posterior):
ret = np.empty((variational_posterior.mean.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)

View file

@ -26,3 +26,5 @@ from .dpgplvm import DPBayesianGPLVM
from .state_space_model import StateSpace
from .ibp_lfm import IBPLFM
from .gp_offset_regression import GPOffsetRegression

View file

@ -0,0 +1,95 @@
# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
# Written by Mike Smith. michaeltsmith.org.uk
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
from ..core import Param
class GPOffsetRegression(GP):
"""
Gaussian Process model for offset regression
:param X: input observations, we assume for this class that this has one dimension of actual inputs and the last dimension should be the index of the cluster (so X should be Nx2)
:param Y: observed values (Nx1?)
:param kernel: a GPy kernel, defaults to rbf
:param Norm normalizer: [False]
:param noise_var: the noise variance for Gaussian likelhood, defaults to 1.
Normalize Y with the norm given.
If normalizer is False, no normalization will be done
If it is None, we use GaussianNorm(alization)
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y, kernel=None, Y_metadata=None, normalizer=None, noise_var=1., mean_function=None):
assert X.shape[1]>1, "Need at least two input dimensions, as last dimension is the label of the cluster"
if kernel is None:
kernel = kern.RBF(X.shape[1]-1)
#self._log_marginal_likelihood = np.nan #todo
likelihood = likelihoods.Gaussian(variance=noise_var)
self.X_fixed = X[:,:-1]
self.selected = np.array([int(x) for x in X[:,-1]])
super(GPOffsetRegression, self).__init__(X, Y, kernel, likelihood, name='GP offset regression', Y_metadata=Y_metadata, normalizer=normalizer, mean_function=mean_function)
maxcluster = np.max(self.selected)
self.offset = Param('offset', np.zeros(maxcluster))
#self.offset.set_prior(...)
self.link_parameter(self.offset)
#def dr_doffset(self, X, sel): #how much r changes wrt the offset hyperparameters
#def dL_doffset(self, X, sel):
# dL_dr = self.dK_dr_via_X(X, X) * dL_dK
def dr_doffset(self,X,sel,delta):
#given an input matrix, X and the offsets (delta)
#finds dr/dDelta
#returns them as a list, one for each offset (delta).
#get the input values
#a matrix G represents the effect of increasing the offset on the radius passed to the kernel for each input. For example
#what effect will increasing offset 4 have on the kernel output of inputs 5 and 8? Answer: Gs[4][5,8]... (positive or negative)
Gs = []
for i,d in enumerate(delta):
#X[sel==(i+1)]-=d
G = np.repeat(np.array(sel==(i+1))[:,None]*1,len(X),axis=1) - np.repeat(np.array(sel==(i+1))[None,:]*1,len(X),axis=0)
Gs.append(G)
#does subtracting the two Xs end up positive or negative (if negative we need to flip the sign in G).
w = np.repeat(X,len(X),axis=1) - np.repeat(X.T,len(X),axis=0)
dr_doffsets = []
for i,d in enumerate(delta):
dr_doffset = np.sign(w * Gs[i])
#print "dr_doffset %d" % i
#print dr_doffset
#print Gs[i]
#print w
dr_doffsets.append(dr_doffset)
#lastly we need to divide by the lengthscale: So far we've found d(X_i - X_j)/dOffsets
#we want dr/dOffsets. (X_i - X_j)/lengthscale = r
dr_doffsets /= self.kern.lengthscale
return dr_doffsets
def parameters_changed(self):
offsets = np.hstack([0.0,self.offset.values])[:,None]
self.X = self.X_fixed - offsets[self.selected]
super(GPOffsetRegression, self).parameters_changed()
dL_dr = self.kern.dK_dr_via_X(self.X, self.X) * self.grad_dict['dL_dK']
dr_doff = self.dr_doffset(self.X,self.selected,self.offset.values)
for i in range(len(dr_doff)):
dL_doff = dL_dr * dr_doff[i]
self.offset.gradient[i] = -np.sum(dL_doff)

View file

@ -2,65 +2,56 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..util.warping_functions import *
#from ..util.warping_functions import *
from ..core import GP
from .. import likelihoods
from GPy.util.warping_functions import TanhWarpingFunction_d
from paramz import ObsAr
#from GPy.util.warping_functions import TanhFunction
from ..util.warping_functions import TanhFunction
from GPy import kern
class WarpedGP(GP):
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3):
"""
This defines a GP Regression model that applies a
warping function to the output.
"""
def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalizer=False):
if kernel is None:
kernel = kern.RBF(X.shape[1])
if warping_function == None:
self.warping_function = TanhWarpingFunction_d(warping_terms)
self.warping_function = TanhFunction(warping_terms)
self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1) * 1)
else:
self.warping_function = warping_function
self.scale_data = False
if self.scale_data:
Y = self._scale_data(Y)
#self.has_uncertain_inputs = False
self.Y_untransformed = Y.copy()
self.predict_in_warped_space = True
likelihood = likelihoods.Gaussian()
GP.__init__(self, X, self.transform_data(), likelihood=likelihood, kernel=kernel)
super(WarpedGP, self).__init__(X, Y.copy(), likelihood=likelihood, kernel=kernel, normalizer=normalizer)
self.Y_normalized = self.Y_normalized.copy()
self.Y_untransformed = self.Y_normalized.copy()
self.predict_in_warped_space = True
self.link_parameter(self.warping_function)
def _scale_data(self, Y):
self._Ymax = Y.max()
self._Ymin = Y.min()
return (Y - self._Ymin) / (self._Ymax - self._Ymin) - 0.5
def _unscale_data(self, Y):
return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin
def set_XY(self, X=None, Y=None):
super(WarpedGP, self).set_XY(X, Y)
self.Y_untransformed = self.Y_normalized.copy()
self.update_model(True)
def parameters_changed(self):
self.Y[:] = self.transform_data()
"""
Notice that we update the warping function gradients here.
"""
self.Y_normalized[:] = self.transform_data()
super(WarpedGP, self).parameters_changed()
Kiy = self.posterior.woodbury_vector.flatten()
grad_y = self.warping_function.fgrad_y(self.Y_untransformed)
grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed,
return_covar_chain=True)
djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
warping_grads = -dquad_dpsi + djac_dpsi
self.warping_function.psi.gradient[:] = warping_grads[:, :-1]
self.warping_function.d.gradient[:] = warping_grads[0, -1]
self.warping_function.update_grads(self.Y_untransformed, Kiy)
def transform_data(self):
Y = self.warping_function.f(self.Y_untransformed.copy()).copy()
return Y
def log_likelihood(self):
"""
Notice we add the jacobian of the warping function here.
"""
ll = GP.log_likelihood(self)
jacobian = self.warping_function.fgrad_y(self.Y_untransformed)
return ll + np.log(jacobian).sum()
@ -73,36 +64,42 @@ class WarpedGP(GP):
arg2 = np.ones(shape=gh_samples.shape).dot(mean.T)
return self.warping_function.f_inv(arg1 + arg2, y=pred_init)
def _get_warped_mean(self, mean, std, pred_init=None, deg_gauss_hermite=100):
def _get_warped_mean(self, mean, std, pred_init=None, deg_gauss_hermite=20):
"""
Calculate the warped mean by using Gauss-Hermite quadrature.
"""
gh_samples, gh_weights = np.polynomial.hermite.hermgauss(deg_gauss_hermite)
gh_samples = gh_samples[:,None]
gh_weights = gh_weights[None,:]
gh_samples = gh_samples[:, None]
gh_weights = gh_weights[None, :]
return gh_weights.dot(self._get_warped_term(mean, std, gh_samples)) / np.sqrt(np.pi)
def _get_warped_variance(self, mean, std, pred_init=None, deg_gauss_hermite=100):
def _get_warped_variance(self, mean, std, pred_init=None, deg_gauss_hermite=20):
"""
Calculate the warped variance by using Gauss-Hermite quadrature.
"""
gh_samples, gh_weights = np.polynomial.hermite.hermgauss(deg_gauss_hermite)
gh_samples = gh_samples[:,None]
gh_weights = gh_weights[None,:]
gh_samples = gh_samples[:, None]
gh_weights = gh_weights[None, :]
arg1 = gh_weights.dot(self._get_warped_term(mean, std, gh_samples,
pred_init=pred_init) ** 2) / np.sqrt(np.pi)
arg2 = self._get_warped_mean(mean, std, pred_init=pred_init,
deg_gauss_hermite=deg_gauss_hermite)
return arg1 - (arg2 ** 2)
def predict(self, Xnew, which_parts='all', pred_init=None, full_cov=False, Y_metadata=None,
median=False, deg_gauss_hermite=100):
# normalize X values
# Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
mu, var = GP._raw_predict(self, Xnew)
def predict(self, Xnew, kern=None, pred_init=None, Y_metadata=None,
median=False, deg_gauss_hermite=20, likelihood=None):
"""
Prediction results depend on:
- The value of the self.predict_in_warped_space flag
- The median flag passed as argument
The likelihood keyword is never used, it is just to follow the plotting API.
"""
#mu, var = GP._raw_predict(self, Xnew)
# now push through likelihood
mean, var = self.likelihood.predictive_values(mu, var)
#mean, var = self.likelihood.predictive_values(mu, var)
mean, var = super(WarpedGP, self).predict(Xnew, kern=kern, full_cov=False, likelihood=likelihood)
if self.predict_in_warped_space:
std = np.sqrt(var)
@ -116,13 +113,9 @@ class WarpedGP(GP):
else:
wmean = mean
wvar = var
if self.scale_data:
pred = self._unscale_data(pred)
return wmean, wvar
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None, likelihood=None, kern=None):
"""
Get the predictive quantiles around the prediction at X
@ -133,19 +126,38 @@ class WarpedGP(GP):
:returns: list of quantiles for each X and predictive quantiles for interval combination
:rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)]
"""
m, v = self._raw_predict(X, full_cov=False)
if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
#return [a, b]
if not self.predict_in_warped_space:
return [a, b]
#print a.shape
new_a = self.warping_function.f_inv(a)
new_b = self.warping_function.f_inv(b)
qs = super(WarpedGP, self).predict_quantiles(X, quantiles, Y_metadata=Y_metadata, likelihood=likelihood, kern=kern)
if self.predict_in_warped_space:
return [self.warping_function.f_inv(q) for q in qs]
return qs
#m, v = self._raw_predict(X, full_cov=False)
#if self.normalizer is not None:
# m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
#a, b = self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
#if not self.predict_in_warped_space:
# return [a, b]
#new_a = self.warping_function.f_inv(a)
#new_b = self.warping_function.f_inv(b)
#return [new_a, new_b]
return [new_a, new_b]
#return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
"""
Calculation of the log predictive density. Notice we add
the jacobian of the warping function here.
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param x_test: test locations (x_{*})
:type x_test: (Nx1) array
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param Y_metadata: metadata associated with the test points
"""
mu_star, var_star = self._raw_predict(x_test)
fy = self.warping_function.f(y_test)
ll_lpd = self.likelihood.log_predictive_density(fy, mu_star, var_star, Y_metadata=Y_metadata)
return ll_lpd + np.log(self.warping_function.fgrad_y(y_test))
if __name__ == '__main__':

View file

@ -61,6 +61,8 @@ class AbstractPlottingLibrary(object):
"""
Get a new figure with nrows and ncolumns subplots.
Does not initialize the canvases yet.
There is individual kwargs for the individual plotting libraries to use.
"""
raise NotImplementedError("Implement all plot functions in AbstractPlottingLibrary in order to use your own plotting library")

View file

@ -31,6 +31,7 @@
import numpy as np
from scipy import sparse
import itertools
from ...models import WarpedGP
def in_ipynb():
try:
@ -295,6 +296,10 @@ def get_x_y_var(model):
Y = model.Y.values
except AttributeError:
Y = model.Y
if isinstance(model, WarpedGP) and not model.predict_in_warped_space:
Y = model.Y_normalized
if sparse.issparse(Y): Y = Y.todense().view(np.ndarray)
return X, X_variance, Y
@ -381,4 +386,4 @@ def x_frame2D(X,plot_limits=None,resolution=None):
resolution = resolution or 50
xx, yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
Xnew = np.c_[xx.flat, yy.flat]
return Xnew, xx, yy, xmin, xmax
return Xnew, xx, yy, xmin, xmax

View file

@ -91,18 +91,32 @@ class MiscTests(unittest.TestCase):
k = GPy.kern.RBF(1)
m2 = GPy.models.GPRegression(self.X, (Y-mu)/std, kernel=k, normalizer=False)
m2[:] = m[:]
mu1, var1 = m.predict(m.X, full_cov=True)
mu2, var2 = m2.predict(m2.X, full_cov=True)
np.testing.assert_allclose(mu1, (mu2*std)+mu)
np.testing.assert_allclose(var1, var2)
np.testing.assert_allclose(var1, var2*std**2)
mu1, var1 = m.predict(m.X, full_cov=False)
mu2, var2 = m2.predict(m2.X, full_cov=False)
np.testing.assert_allclose(mu1, (mu2*std)+mu)
np.testing.assert_allclose(var1, var2)
np.testing.assert_allclose(var1, var2*std**2)
q50n = m.predict_quantiles(m.X, (50,))
q50 = m2.predict_quantiles(m2.X, (50,))
np.testing.assert_allclose(q50n[0], (q50[0]*std)+mu)
# Test variance component:
qs = np.array([2.5, 97.5])
# The quantiles get computed before unormalization
# And transformed using the mean transformation:
c = np.random.choice(self.X.shape[0])
q95 = m2.predict_quantiles(self.X[[c]], qs)
mu, var = m2.predict(self.X[[c]])
from scipy.stats import norm
np.testing.assert_allclose((mu+(norm.ppf(qs/100.)*np.sqrt(var))).flatten(), np.array(q95).flatten())
def check_jacobian(self):
try:
@ -361,51 +375,95 @@ class MiscTests(unittest.TestCase):
preds = m.predict(self.X)
warp_k = GPy.kern.RBF(1)
warp_f = GPy.util.warping_functions.IdentityFunction()
warp_m = GPy.models.WarpedGP(self.X, self.Y, kernel=warp_k, warping_function=warp_f)
warp_f = GPy.util.warping_functions.IdentityFunction(closed_inverse=False)
warp_m = GPy.models.WarpedGP(self.X, self.Y, kernel=warp_k,
warping_function=warp_f)
warp_m.optimize()
warp_preds = warp_m.predict(self.X)
np.testing.assert_almost_equal(preds, warp_preds)
warp_k_exact = GPy.kern.RBF(1)
warp_f_exact = GPy.util.warping_functions.IdentityFunction()
warp_m_exact = GPy.models.WarpedGP(self.X, self.Y, kernel=warp_k_exact,
warping_function=warp_f_exact)
warp_m_exact.optimize()
warp_preds_exact = warp_m_exact.predict(self.X)
np.testing.assert_almost_equal(preds, warp_preds, decimal=4)
np.testing.assert_almost_equal(preds, warp_preds_exact, decimal=4)
@unittest.skip('Comment this to plot the modified sine function')
def test_warped_gp_sine(self):
def test_warped_gp_log(self):
"""
A test replicating the sine regression problem from
Snelson's paper.
A WarpedGP with the log warping function should be
equal to a standard GP with log labels.
Note that we predict the median here.
"""
k = GPy.kern.RBF(1)
Y = np.abs(self.Y)
logY = np.log(Y)
m = GPy.models.GPRegression(self.X, logY, kernel=k)
m.optimize()
preds = m.predict(self.X)[0]
warp_k = GPy.kern.RBF(1)
warp_f = GPy.util.warping_functions.LogFunction(closed_inverse=False)
warp_m = GPy.models.WarpedGP(self.X, Y, kernel=warp_k,
warping_function=warp_f)
warp_m.optimize()
warp_preds = warp_m.predict(self.X, median=True)[0]
warp_k_exact = GPy.kern.RBF(1)
warp_f_exact = GPy.util.warping_functions.LogFunction()
warp_m_exact = GPy.models.WarpedGP(self.X, Y, kernel=warp_k_exact,
warping_function=warp_f_exact)
warp_m_exact.optimize(messages=True)
warp_preds_exact = warp_m_exact.predict(self.X, median=True)[0]
np.testing.assert_almost_equal(np.exp(preds), warp_preds, decimal=4)
np.testing.assert_almost_equal(np.exp(preds), warp_preds_exact, decimal=4)
def test_warped_gp_cubic_sine(self, max_iters=100):
"""
A test replicating the cubic sine regression problem from
Snelson's paper. This test doesn't have any assertions, it's
just to ensure coverage of the tanh warping function code.
"""
X = (2 * np.pi) * np.random.random(151) - np.pi
Y = np.sin(X) + np.random.normal(0,0.1,151)
Y = np.exp(Y) - 5
#Y = np.array([np.power(abs(y),float(1)/3) * (1,-1)[y<0] for y in Y]) + 0
Y = np.sin(X) + np.random.normal(0,0.2,151)
Y = np.array([np.power(abs(y),float(1)/3) * (1,-1)[y<0] for y in Y])
X = X[:, None]
Y = Y[:, None]
#np.seterr(over='raise')
import matplotlib.pyplot as plt
warp_k = GPy.kern.RBF(1)
warp_f = GPy.util.warping_functions.TanhWarpingFunction_d(n_terms=2)
warp_m = GPy.models.WarpedGP(X[:, None], Y[:, None], kernel=warp_k, warping_function=warp_f)
#warp_m['.*variance.*'].constrain_fixed(0.25)
#warp_m['.*lengthscale.*'].constrain_fixed(1)
#warp_m['warp_tanh.d'].constrain_fixed(1)
#warp_m.randomize()
#warp_m['.*warp_tanh.psi*'][:,0:2].constrain_bounded(0,100)
#warp_m['.*warp_tanh.psi*'][:,0:1].constrain_fixed(1)
#print(warp_m.checkgrad())
#warp_m.plot()
#plt.show()
warp_m.optimize_restarts(parallel=True, robust=True)
#print(warp_m.checkgrad())
print(warp_m)
print(warp_m['.*warp.*'])
warp_m = GPy.models.WarpedGP(X, Y)#, kernel=warp_k)#, warping_function=warp_f)
warp_m['.*\.d'].constrain_fixed(1.0)
warp_m.optimize_restarts(parallel=False, robust=False, num_restarts=5,
max_iters=max_iters)
warp_m.predict(X)
warp_m.predict_quantiles(X)
warp_m.log_predictive_density(X, Y)
warp_m.predict_in_warped_space = False
warp_m.plot()
warp_m.predict_in_warped_space = True
warp_m.plot()
warp_f.plot(X.min()-10, X.max()+10)
plt.show()
def test_offset_regression(self):
#Tests GPy.models.GPOffsetRegression. Using two small time series
#from a sine wave, we confirm the algorithm determines that the
#likelihood is maximised when the offset hyperparameter is approximately
#equal to the actual offset in X between the two time series.
offset = 3
X1 = np.arange(0,50,5.0)[:,None]
X2 = np.arange(0+offset,50+offset,5.0)[:,None]
X = np.vstack([X1,X2])
ind = np.vstack([np.zeros([10,1]),np.ones([10,1])])
X = np.hstack([X,ind])
Y = np.sin((X[0:10,0])/30.0)[:,None]
Y = np.vstack([Y,Y])
m = GPy.models.GPOffsetRegression(X,Y)
m.rbf.lengthscale=5.0 #make it something other than one to check our gradients properly!
assert m.checkgrad(), "Gradients of offset parameters don't match numerical approximations."
m.optimize()
assert np.abs(m.offset[0]-offset)<0.1, ("GPOffsetRegression model failing to estimate correct offset (value estimated = %0.2f instead of %0.2f)" % (m.offset[0], offset))
class GradientTests(np.testing.TestCase):

View file

@ -29,6 +29,7 @@
#===============================================================================
import unittest, numpy as np
import GPy
class TestDebug(unittest.TestCase):
def test_checkFinite(self):
@ -107,3 +108,50 @@ class TestDebug(unittest.TestCase):
self.assertTrue(d[tuple(X[:,4])] == d[tuple(X[:,0])] == [0, 4, 5])
self.assertTrue(d[tuple(X[:,1])] == [1])
def test_offset_cluster(self):
#Tests the GPy.util.cluster_with_offset.cluster utility with a small
#test data set. Not using random noise just in case it occasionally
#causes it not to cluster correctly.
#groundtruth cluster identifiers are: [0,1,1,0]
#data contains a list of the four sets of time series (3 per data point)
data = [np.array([[ 2.18094245, 1.96529789, 2.00265523, 2.18218742, 2.06795428],
[ 1.62254829, 1.75748448, 1.83879347, 1.87531326, 1.52503496],
[ 1.54589609, 1.61607914, 2.00463192, 1.48771394, 1.63339218]]),
np.array([[ 2.86766106, 2.97953437, 2.91958876, 2.92510506, 3.03239241],
[ 2.57368423, 2.59954886, 3.10000395, 2.75806125, 2.89865704],
[ 2.58916318, 2.53698259, 2.63858411, 2.63102504, 2.51853901]]),
np.array([[ 2.77834168, 2.9618564 , 2.88482141, 3.24259745, 2.9716821 ],
[ 2.60675576, 2.67095624, 2.94824436, 2.80520631, 2.87247516],
[ 2.49543562, 2.5492281 , 2.6505866 , 2.65015308, 2.59738616]]),
np.array([[ 1.76783086, 2.21666738, 2.07939706, 1.9268263 , 2.23360121],
[ 1.94305547, 1.94648592, 2.1278921 , 2.09481457, 2.08575238],
[ 1.69336013, 1.72285186, 1.6339506 , 1.61212022, 1.39198698]])]
#inputs contains their associated X values
inputs = [np.array([[ 0. ],
[ 0.68040097],
[ 1.20316795],
[ 1.798749 ],
[ 2.14891733]]), np.array([[ 0. ],
[ 0.51910637],
[ 0.98259352],
[ 1.57442965],
[ 1.82515098]]), np.array([[ 0. ],
[ 0.66645478],
[ 1.59464591],
[ 1.69769551],
[ 1.80932752]]), np.array([[ 0. ],
[ 0.87512108],
[ 1.71881079],
[ 2.67162871],
[ 3.23761907]])]
#try doing the clustering
active = GPy.util.cluster_with_offset.cluster(data,inputs)
#check to see that the clustering has correctly clustered the time series.
clusters = set([frozenset(cluster) for cluster in active])
assert set([1,2]) in clusters, "Offset Clustering algorithm failed"
assert set([0,3]) in clusters, "Offset Clustering algoirthm failed"

View file

@ -16,3 +16,4 @@ from . import initialization
from . import multioutput
from . import parallel
from . import functions
from . import cluster_with_offset

View file

@ -0,0 +1,184 @@
# Copyright (c) 2016, Mike Smith
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import GPy
import numpy as np
import sys #so I can print dots
def get_log_likelihood(inputs,data,clust):
"""Get the LL of a combined set of clusters, ignoring time series offsets.
Get the log likelihood of a cluster without worrying about the fact
different time series are offset. We're using it here really for those
cases in which we only have one cluster to get the loglikelihood of.
arguments:
inputs -- the 'X's in a list, one item per cluster
data -- the 'Y's in a list, one item per cluster
clust -- list of clusters to use
returns a tuple:
log likelihood and the offset (which is always zero for this model)
"""
S = data[0].shape[0] #number of time series
#build a new dataset from the clusters, by combining all clusters together
X = np.zeros([0,1])
Y = np.zeros([0,S])
#for each person in the cluster,
#add their inputs and data to the new dataset
for p in clust:
X = np.vstack([X,inputs[p]])
Y = np.vstack([Y,data[p].T])
#find the loglikelihood. We just add together the LL for each time series.
#ll=0
#for s in range(S):
# m = GPy.models.GPRegression(X,Y[:,s][:,None])
# m.optimize()
# ll+=m.log_likelihood()
m = GPy.models.GPRegression(X,Y)
m.optimize()
ll=m.log_likelihood()
return ll,0
def get_log_likelihood_offset(inputs,data,clust):
"""Get the log likelihood of a combined set of clusters, fitting the offsets
arguments:
inputs -- the 'X's in a list, one item per cluster
data -- the 'Y's in a list, one item per cluster
clust -- list of clusters to use
returns a tuple:
log likelihood and the offset
"""
#if we've only got one cluster, the model has an error, so we want to just
#use normal GPRegression.
if len(clust)==1:
return get_log_likelihood(inputs,data,clust)
S = data[0].shape[0] #number of time series
X = np.zeros([0,2]) #notice the extra column, this is for the cluster index
Y = np.zeros([0,S])
#for each person in the cluster, add their inputs and data to the new
#dataset. Note we add an index identifying which person is which data point.
#This is for the offset model to use, to allow it to know which data points
#to shift.
for i,p in enumerate(clust):
idx = i*np.ones([inputs[p].shape[0],1])
X = np.vstack([X,np.hstack([inputs[p],idx])])
Y = np.vstack([Y,data[p].T])
m = GPy.models.GPOffsetRegression(X,Y)
#TODO: How to select a sensible prior?
m.offset.set_prior(GPy.priors.Gaussian(0,20))
#TODO: Set a sensible start value for the length scale,
#make it long to help the offset fit.
m.optimize()
ll = m.log_likelihood()
offset = m.offset.values[0]
return ll,offset
def cluster(data,inputs,verbose=False):
"""Clusters data
Using the new offset model, this method uses a greedy algorithm to cluster
the data. It starts with all the data points in separate clusters and tests
whether combining them increases the overall log-likelihood (LL). It then
iteratively joins pairs of clusters which cause the greatest increase in
the LL, until no join increases the LL.
arguments:
inputs -- the 'X's in a list, one item per cluster
data -- the 'Y's in a list, one item per cluster
returns a list of the clusters.
"""
N=len(data)
#Define a set of N active cluster
active = []
for p in range(0,N):
active.append([p])
loglikes = np.zeros(len(active))
loglikes[:] = None
pairloglikes = np.zeros([len(active),len(active)])
pairloglikes[:] = None
pairoffset = np.zeros([len(active),len(active)])
it = 0
while True:
if verbose:
it +=1
print("Iteration %d" % it)
#Compute the log-likelihood of each cluster (add them together)
for clusti in range(len(active)):
if verbose:
sys.stdout.write('.')
sys.stdout.flush()
if np.isnan(loglikes[clusti]):
loglikes[clusti], unused_offset = get_log_likelihood_offset(inputs,data,[clusti])
#try combining with each other cluster...
for clustj in range(clusti): #count from 0 to clustj-1
temp = [clusti,clustj]
if np.isnan(pairloglikes[clusti,clustj]):
pairloglikes[clusti,clustj],pairoffset[clusti,clustj] = get_log_likelihood_offset(inputs,data,temp)
seploglikes = np.repeat(loglikes[:,None].T,len(loglikes),0)+np.repeat(loglikes[:,None],len(loglikes),1)
loglikeimprovement = pairloglikes - seploglikes #how much likelihood improves with clustering
top = np.unravel_index(np.nanargmax(pairloglikes-seploglikes), pairloglikes.shape)
#if loglikeimprovement.shape[0]<3:
# #no more clustering to do - this shouldn't happen really unless
# #we've set the threshold to apply clustering to less than 0
# break
#if theres further clustering to be done...
if loglikeimprovement[top[0],top[1]]>0:
active[top[0]].extend(active[top[1]])
offset=pairoffset[top[0],top[1]]
inputs[top[0]] = np.vstack([inputs[top[0]],inputs[top[1]]-offset])
data[top[0]] = np.hstack([data[top[0]],data[top[1]]])
del inputs[top[1]]
del data[top[1]]
del active[top[1]]
#None = message to say we need to recalculate
pairloglikes[:,top[0]] = None
pairloglikes[top[0],:] = None
pairloglikes = np.delete(pairloglikes,top[1],0)
pairloglikes = np.delete(pairloglikes,top[1],1)
loglikes[top[0]] = None
loglikes = np.delete(loglikes,top[1])
else:
break
#if loglikeimprovement[top[0],top[1]]>0:
# print "joined"
# print top
# print offset
# print offsets
# print offsets[top[1]]-offsets[top[0]]
#TODO Add a way to return the offsets applied to all the time series
return active
#starttime = time.time()
#active = cluster(data,inputs)
#endtime = time.time()
#print "TOTAL TIME %0.4f" % (endtime-starttime)

View file

@ -6,7 +6,7 @@ Created on Aug 27, 2014
import logging
import numpy as np
class Norm(object):
class _Norm(object):
def __init__(self):
pass
def scale_by(self, Y):
@ -18,7 +18,8 @@ class Norm(object):
"""
Project Y into normalized space
"""
raise NotImplementedError
if not self.scaled():
raise AttributeError("Norm object not initialized yet, try calling scale_by(data) first.")
def inverse_mean(self, X):
"""
Project the normalized object X into space of Y
@ -31,15 +32,46 @@ class Norm(object):
Whether this Norm object has been initialized.
"""
raise NotImplementedError
class MeanNorm(Norm):
class Standardize(_Norm):
def __init__(self):
self.mean = None
def scale_by(self, Y):
Y = np.ma.masked_invalid(Y, copy=False)
self.mean = Y.mean(0).view(np.ndarray)
self.std = Y.std(0).view(np.ndarray)
def normalize(self, Y):
return Y-self.mean
super(Standardize, self).normalize(Y)
return (Y-self.mean)/self.std
def inverse_mean(self, X):
return X+self.mean
return (X*self.std)+self.mean
def inverse_variance(self, var):
return (var*(self.std**2))
def scaled(self):
return self.mean is not None
# Inverse variance to be implemented, disabling for now
# If someone in the future want to implement this,
# we need to implement the inverse variance for
# normalization. This means, we need to know the factor
# for the variance to be multiplied to the variance in
# normalized space. This is easy to compute for standardization
# (see above) but gets tricky here.
# class Normalize(_Norm):
# def __init__(self):
# self.ymin = None
# self.ymax = None
# def scale_by(self, Y):
# Y = np.ma.masked_invalid(Y, copy=False)
# self.ymin = Y.min(0).view(np.ndarray)
# self.ymax = Y.max(0).view(np.ndarray)
# def normalize(self, Y):
# super(Normalize, self).normalize(Y)
# return (Y - self.ymin) / (self.ymax - self.ymin) - .5
# def inverse_mean(self, X):
# return (X + .5) * (self.ymax - self.ymin) + self.ymin
# def inverse_variance(self, var):
#
# return (var*(self.std**2))
# def scaled(self):
# return (self.ymin is not None) and (self.ymax is not None)

View file

@ -4,6 +4,7 @@
import numpy as np
from ..core.parameterization import Parameterized, Param
from paramz.transformations import Logexp
import sys
class WarpingFunction(Parameterized):
@ -14,6 +15,7 @@ class WarpingFunction(Parameterized):
def __init__(self, name):
super(WarpingFunction, self).__init__(name=name)
self.rate = 0.1
def f(self, y, psi):
"""function transformation
@ -29,15 +31,32 @@ class WarpingFunction(Parameterized):
"""gradient of f w.r.t to y"""
raise NotImplementedError
def f_inv(self, z, psi):
"""inverse function transformation"""
raise NotImplementedError
def f_inv(self, z, max_iterations=250, y=None):
"""
Calculate the numerical inverse of f. This should be
overwritten for specific warping functions where the
inverse can be found in closed form.
def _get_param_names(self):
raise NotImplementedError
:param max_iterations: maximum number of N.R. iterations
"""
z = z.copy()
y = np.ones_like(z)
it = 0
update = np.inf
while np.abs(update).sum() > 1e-10 and it < max_iterations:
fy = self.f(y)
fgrady = self.fgrad_y(y)
update = (fy - z) / fgrady
y -= self.rate * update
it += 1
#if it == max_iterations:
# print("WARNING!!! Maximum number of iterations reached in f_inv ")
# print("Sum of roots: %.4f" % np.sum(fy - z))
return y
def plot(self, xmin, xmax):
psi = self.psi
y = np.arange(xmin, xmax, 0.01)
f_y = self.f(y)
from matplotlib import pyplot as plt
@ -49,183 +68,59 @@ class WarpingFunction(Parameterized):
plt.show()
class TanhWarpingFunction(WarpingFunction):
def __init__(self, n_terms=3):
"""n_terms specifies the number of tanh terms to be used"""
self.n_terms = n_terms
self.num_parameters = 3 * self.n_terms
super(TanhWarpingFunction, self).__init__(name='warp_tanh')
def f(self, y, psi):
class TanhFunction(WarpingFunction):
"""
This is the function proposed in Snelson et al.:
A sum of tanh functions with linear trends outside
the range. Notice the term 'd', which scales the
linear trend.
"""
def __init__(self, n_terms=3, initial_y=None):
"""
transform y with f using parameter vector psi
psi = [[a,b,c]]
::math::`f = \\sum_{terms} a * tanh(b*(y+c))`
n_terms specifies the number of tanh terms to be used
"""
#1. check that number of params is consistent
assert psi.shape[0] == self.n_terms, 'inconsistent parameter dimensions'
assert psi.shape[1] == 3, 'inconsistent parameter dimensions'
#2. exponentiate the a and b (positive!)
mpsi = psi.copy()
#3. transform data
z = y.copy()
for i in range(len(mpsi)):
a,b,c = mpsi[i]
z += a*np.tanh(b*(y+c))
return z
def f_inv(self, y, psi, iterations=10):
"""
calculate the numerical inverse of f
:param iterations: number of N.R. iterations
"""
y = y.copy()
z = np.ones_like(y)
for i in range(iterations):
z -= (self.f(z, psi) - y)/self.fgrad_y(z,psi)
return z
def fgrad_y(self, y, psi, return_precalc=False):
"""
gradient of f w.r.t to y ([N x 1])
returns: Nx1 vector of derivatives, unless return_precalc is true,
then it also returns the precomputed stuff
"""
mpsi = psi.copy()
# vectorized version
# S = (mpsi[:,1]*(y + mpsi[:,2])).T
S = (mpsi[:,1]*(y[:,:,None] + mpsi[:,2])).T
R = np.tanh(S)
D = 1-R**2
# GRAD = (1+(mpsi[:,0:1]*mpsi[:,1:2]*D).sum(axis=0))[:,np.newaxis]
GRAD = (1+(mpsi[:,0:1][:,:,None]*mpsi[:,1:2][:,:,None]*D).sum(axis=0)).T
if return_precalc:
# return GRAD,S.sum(axis=1),R.sum(axis=1),D.sum(axis=1)
return GRAD, S, R, D
return GRAD
def fgrad_y_psi(self, y, psi, return_covar_chain=False):
"""
gradient of f w.r.t to y and psi
returns: NxIx3 tensor of partial derivatives
"""
# 1. exponentiate the a and b (positive!)
mpsi = psi.copy()
w, s, r, d = self.fgrad_y(y, psi, return_precalc = True)
gradients = np.zeros((y.shape[0], y.shape[1], len(mpsi), 3))
for i in range(len(mpsi)):
a,b,c = mpsi[i]
gradients[:,:,i,0] = (b*(1.0/np.cosh(s[i]))**2).T
gradients[:,:,i,1] = a*(d[i] - 2.0*s[i]*r[i]*(1.0/np.cosh(s[i]))**2).T
gradients[:,:,i,2] = (-2.0*a*(b**2)*r[i]*((1.0/np.cosh(s[i]))**2)).T
if return_covar_chain:
covar_grad_chain = np.zeros((y.shape[0], y.shape[1], len(mpsi), 3))
for i in range(len(mpsi)):
a,b,c = mpsi[i]
covar_grad_chain[:, :, i, 0] = (r[i]).T
covar_grad_chain[:, :, i, 1] = (a*(y + c) * ((1.0/np.cosh(s[i]))**2).T)
covar_grad_chain[:, :, i, 2] = a*b*((1.0/np.cosh(s[i]))**2).T
return gradients, covar_grad_chain
return gradients
def _get_param_names(self):
variables = ['a', 'b', 'c']
names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)] for q in range(self.n_terms)],[])
return names
class TanhWarpingFunction_d(WarpingFunction):
def __init__(self, n_terms=3):
"""n_terms specifies the number of tanh terms to be used"""
self.n_terms = n_terms
self.num_parameters = 3 * self.n_terms + 1
self.psi = np.ones((self.n_terms, 3))
super(TanhWarpingFunction_d, self).__init__(name='warp_tanh')
super(TanhFunction, self).__init__(name='warp_tanh')
self.psi = Param('psi', self.psi)
self.psi[:, :2].constrain_positive()
self.d = Param('%s' % ('d'), 1.0, Logexp())
self.link_parameter(self.psi)
self.link_parameter(self.d)
self.initial_y = initial_y
def f(self, y):
"""
Transform y with f using parameter vector psi
psi = [[a,b,c]]
:math:`f = \\sum_{terms} a * tanh(b*(y+c))`
:math:`f = (y * d) + \\sum_{terms} a * tanh(b *(y + c))`
"""
#1. check that number of params is consistent
# assert psi.shape[0] == self.n_terms, 'inconsistent parameter dimensions'
# assert psi.shape[1] == 4, 'inconsistent parameter dimensions'
d = self.d
mpsi = self.psi
#3. transform data
z = d*y.copy()
z = d * y.copy()
for i in range(len(mpsi)):
a,b,c = mpsi[i]
z += a*np.tanh(b*(y+c))
a, b, c = mpsi[i]
z += a * np.tanh(b * (y + c))
return z
def f_inv(self, z, max_iterations=1000, y=None):
"""
calculate the numerical inverse of f
:param max_iterations: maximum number of N.R. iterations
"""
z = z.copy()
if y is None:
y = np.ones_like(z)
it = 0
update = np.inf
while it == 0 or (np.abs(update).sum() > 1e-10 and it < max_iterations):
fy = self.f(y)
fgrady = self.fgrad_y(y)
update = (fy - z)/fgrady
y -= update
it += 1
if it == max_iterations:
print("WARNING!!! Maximum number of iterations reached in f_inv ")
return y
def fgrad_y(self, y, return_precalc=False):
"""
gradient of f w.r.t to y ([N x 1])
:returns: Nx1 vector of derivatives, unless return_precalc is true, then it also returns the precomputed stuff
:returns: Nx1 vector of derivatives, unless return_precalc is true,
then it also returns the precomputed stuff
"""
d = self.d
mpsi = self.psi
# vectorized version
S = (mpsi[:,1]*(y[:,:,None] + mpsi[:,2])).T
S = (mpsi[:,1] * (y[:,:,None] + mpsi[:,2])).T
R = np.tanh(S)
D = 1-R**2
D = 1 - (R ** 2)
GRAD = (d + (mpsi[:,0:1][:,:,None]*mpsi[:,1:2][:,:,None]*D).sum(axis=0)).T
GRAD = (d + (mpsi[:,0:1][:,:,None] * mpsi[:,1:2][:,:,None] * D).sum(axis=0)).T
if return_precalc:
return GRAD, S, R, D
@ -244,45 +139,79 @@ class TanhWarpingFunction_d(WarpingFunction):
gradients = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4))
for i in range(len(mpsi)):
a,b,c = mpsi[i]
gradients[:,:,i,0] = (b*(1.0/np.cosh(s[i]))**2).T
gradients[:,:,i,1] = a*(d[i] - 2.0*s[i]*r[i]*(1.0/np.cosh(s[i]))**2).T
gradients[:,:,i,2] = (-2.0*a*(b**2)*r[i]*((1.0/np.cosh(s[i]))**2)).T
gradients[:,:,0,3] = 1.0
gradients[:, :, i, 0] = (b * (1.0/np.cosh(s[i])) ** 2).T
gradients[:, :, i, 1] = a * (d[i] - 2.0 * s[i] * r[i] * (1.0/np.cosh(s[i])) ** 2).T
gradients[:, :, i, 2] = (-2.0 * a * (b ** 2) * r[i] * ((1.0 / np.cosh(s[i])) ** 2)).T
gradients[:, :, 0, 3] = 1.0
if return_covar_chain:
covar_grad_chain = np.zeros((y.shape[0], y.shape[1], len(mpsi), 4))
for i in range(len(mpsi)):
a,b,c = mpsi[i]
covar_grad_chain[:, :, i, 0] = (r[i]).T
covar_grad_chain[:, :, i, 1] = (a*(y + c) * ((1.0/np.cosh(s[i]))**2).T)
covar_grad_chain[:, :, i, 2] = a*b*((1.0/np.cosh(s[i]))**2).T
covar_grad_chain[:, :, i, 1] = (a * (y + c) * ((1.0 / np.cosh(s[i])) ** 2).T)
covar_grad_chain[:, :, i, 2] = a * b * ((1.0 / np.cosh(s[i])) ** 2).T
covar_grad_chain[:, :, 0, 3] = y
return gradients, covar_grad_chain
return gradients
def _get_param_names(self):
variables = ['a', 'b', 'c', 'd']
names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)] for q in range(self.n_terms)],[])
names.append('warp_tanh_d')
return names
def update_grads(self, Y_untransformed, Kiy):
grad_y = self.fgrad_y(Y_untransformed)
grad_y_psi, grad_psi = self.fgrad_y_psi(Y_untransformed,
return_covar_chain=True)
djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0)
dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0)
warping_grads = -dquad_dpsi + djac_dpsi
self.psi.gradient[:] = warping_grads[:, :-1]
self.d.gradient[:] = warping_grads[0, -1]
class LogFunction(WarpingFunction):
"""
Easy wrapper for applying a fixed log warping function to
positive-only values.
The closed_inverse flag should only be set to False for
debugging and testing purposes.
"""
def __init__(self, closed_inverse=True):
self.num_parameters = 0
super(LogFunction, self).__init__(name='log')
if closed_inverse:
self.f_inv = self._f_inv
def f(self, y):
return np.log(y)
def fgrad_y(self, y):
return 1. / y
def update_grads(self, Y_untransformed, Kiy):
pass
def fgrad_y_psi(self, y, return_covar_chain=False):
if return_covar_chain:
return 0, 0
return 0
def _f_inv(self, z, y=None):
return np.exp(z)
class IdentityFunction(WarpingFunction):
"""
Identity warping function. This is for testing and sanity check purposes
and should not be used in practice.
The closed_inverse flag should only be set to False for
debugging and testing purposes.
"""
def __init__(self):
self.num_parameters = 4
self.psi = Param('psi', np.zeros((1,3)))
self.d = Param('%s' % ('d'), 1.0, Logexp())
def __init__(self, closed_inverse=True):
self.num_parameters = 0
super(IdentityFunction, self).__init__(name='identity')
self.link_parameter(self.psi)
self.link_parameter(self.d)
if closed_inverse:
self.f_inv = self._f_inv
def f(self, y):
return y
@ -290,11 +219,14 @@ class IdentityFunction(WarpingFunction):
def fgrad_y(self, y):
return np.ones(y.shape)
def update_grads(self, Y_untransformed, Kiy):
pass
def fgrad_y_psi(self, y, return_covar_chain=False):
gradients = np.zeros((y.shape[0], y.shape[1], len(self.psi), 4))
if return_covar_chain:
return gradients, gradients
return gradients
return 0, 0
return 0
def f_inv(self, z, y=None):
def _f_inv(self, z, y=None):
return z

View file

@ -9,7 +9,7 @@ The Gaussian processes framework in Python.
* Travis-CI [unit-tests](https://travis-ci.org/SheffieldML/GPy)
* [![licence](https://img.shields.io/badge/licence-BSD-blue.svg)](http://opensource.org/licenses/BSD-3-Clause)
[![develstat](https://travis-ci.org/SheffieldML/GPy.svg?branch=devel)](https://travis-ci.org/SheffieldML/GPy) [![appveyor](https://ci.appveyor.com/api/projects/status/662o6tha09m2jix3/branch/deploy?svg=true)](https://ci.appveyor.com/project/mzwiessele/gpy/branch/deploy) [![coverallsdevel](https://coveralls.io/repos/github/SheffieldML/GPy/badge.svg?branch=devel)](https://coveralls.io/github/SheffieldML/GPy?branch=devel) [![covdevel](http://codecov.io/github/SheffieldML/GPy/coverage.svg?branch=devel)](http://codecov.io/github/SheffieldML/GPy?branch=devel) [![Research software impact](http://depsy.org/api/package/pypi/GPy/badge.svg)](http://depsy.org/package/python/GPy) [![Code Health](https://landscape.io/github/SheffieldML/GPy/devel/landscape.svg?style=flat)](https://landscape.io/github/SheffieldML/GPy/devel)
[![deploystat](https://travis-ci.org/SheffieldML/GPy.svg?branch=deploy)](https://travis-ci.org/SheffieldML/GPy) [![appveyor](https://ci.appveyor.com/api/projects/status/662o6tha09m2jix3/branch/deploy?svg=true)](https://ci.appveyor.com/project/mzwiessele/gpy/branch/deploy) [![coverallsdevel](https://coveralls.io/repos/github/SheffieldML/GPy/badge.svg?branch=devel)](https://coveralls.io/github/SheffieldML/GPy?branch=devel) [![covdevel](http://codecov.io/github/SheffieldML/GPy/coverage.svg?branch=devel)](http://codecov.io/github/SheffieldML/GPy?branch=devel) [![Research software impact](http://depsy.org/api/package/pypi/GPy/badge.svg)](http://depsy.org/package/python/GPy) [![Code Health](https://landscape.io/github/SheffieldML/GPy/devel/landscape.svg?style=flat)](https://landscape.io/github/SheffieldML/GPy/devel)
## Updated Structure

View file

@ -3,7 +3,7 @@ environment:
secure: 8/ZjXFwtd1S7ixd7PJOpptupKKEDhm2da/q3unabJ00=
COVERALLS_REPO_TOKEN:
secure: d3Luic/ESkGaWnZrvWZTKrzO+xaVwJWaRCEP0F+K/9DQGPSRZsJ/Du5g3s4XF+tS
gpy_version: 1.2.1
gpy_version: 1.4.0
matrix:
- PYTHON_VERSION: 2.7
MINICONDA: C:\Miniconda-x64
@ -71,10 +71,10 @@ deploy_script:
- echo password:%pip_access% >> %USERPROFILE%\\.pypirc
- ps: >-
if ($env:APPVEYOR_REPO_BRANCH -eq 'devel') {
twine upload -r test dist/*
twine upload --skip-existing -r test dist/*
}
elseif ($env:APPVEYOR_REPO_BRANCH -eq 'deploy') {
twine upload dist/*
twine upload --skip-existing dist/*
}
else {
echo not deploying on other branches

View file

@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.2.1
current_version = 1.4.0
tag = False
commit = True

View file

@ -148,7 +148,7 @@ setup(name = 'GPy',
include_package_data = True,
py_modules = ['GPy.__init__'],
test_suite = 'GPy.testing',
install_requires = ['numpy>=1.7', 'scipy>=0.16', 'six', 'paramz>=0.5.2'],
install_requires = ['numpy>=1.7', 'scipy>=0.16', 'six', 'paramz>=0.6.6'],
extras_require = {'docs':['sphinx'],
'optional':['mpi4py',
'ipython>=4.0.0',