mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-02 14:45:15 +02:00
merge completed
This commit is contained in:
commit
57b4aaab6e
17 changed files with 825 additions and 623 deletions
|
|
@ -16,7 +16,7 @@ class GPBase(Model):
|
|||
def __init__(self, X, likelihood, kernel, normalize_X=False):
|
||||
if len(X.shape)==1:
|
||||
X = X.reshape(-1,1)
|
||||
warning.warn("One dimension output (N,) being reshaped to (N,1)")
|
||||
warnings.warn("One dimension output (N,) being reshaped to (N,1)")
|
||||
self.X = X
|
||||
assert len(self.X.shape) == 2, "too many dimensions for X input"
|
||||
self.num_data, self.input_dim = self.X.shape
|
||||
|
|
@ -76,7 +76,7 @@ class GPBase(Model):
|
|||
:type noise_model: integer.
|
||||
:returns: Ysim: set of simulations, a Numpy array (N x samples).
|
||||
"""
|
||||
Ysim = self.posterior_samples_f(X, size, which_parts=which_parts, full_cov=True)
|
||||
Ysim = self.posterior_samples_f(X, size, which_parts=which_parts)
|
||||
if isinstance(self.likelihood,Gaussian):
|
||||
noise_std = np.sqrt(self.likelihood._get_params())
|
||||
Ysim += np.random.normal(0,noise_std,Ysim.shape)
|
||||
|
|
@ -176,8 +176,8 @@ class GPBase(Model):
|
|||
upper = m + 2*np.sqrt(v)
|
||||
Y = self.likelihood.Y
|
||||
else:
|
||||
m, v, lower, upper = self.predict(Xgrid, which_parts=which_parts,sampling=False) #Compute the exact mean
|
||||
m_, v_, lower, upper = self.predict(Xgrid, which_parts=which_parts,sampling=True,num_samples=15000) #Apporximate the percentiles
|
||||
m, v, lower, upper = self.predict(Xgrid, which_parts=which_parts, sampling=False) #Compute the exact mean
|
||||
m_, v_, lower, upper = self.predict(Xgrid, which_parts=which_parts, sampling=True, num_samples=15000) #Apporximate the percentiles
|
||||
Y = self.likelihood.data
|
||||
for d in which_data_ycols:
|
||||
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
|
||||
|
|
@ -185,7 +185,7 @@ class GPBase(Model):
|
|||
|
||||
#optionally plot some samples
|
||||
if samples: #NOTE not tested with fixed_inputs
|
||||
Ysim = self.posterior_samples(Xgrid, samples, which_parts=which_parts, full_cov=True)
|
||||
Ysim = self.posterior_samples(Xgrid, samples, which_parts=which_parts)
|
||||
for yi in Ysim.T:
|
||||
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
||||
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
|
||||
|
|
|
|||
|
|
@ -31,14 +31,12 @@ class SVIGP(GPBase):
|
|||
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, X, likelihood, kernel, Z, q_u=None, batchsize=10, X_variance=None):
|
||||
GPBase.__init__(self, X, likelihood, kernel, normalize_X=False)
|
||||
self.batchsize=batchsize
|
||||
self.Y = self.likelihood.Y.copy()
|
||||
self.Z = Z
|
||||
self.num_inducing = Z.shape[0]
|
||||
|
||||
self.batchcounter = 0
|
||||
self.epochs = 0
|
||||
self.iterations = 0
|
||||
|
|
@ -309,7 +307,7 @@ class SVIGP(GPBase):
|
|||
self._param_trace.append(self._get_params())
|
||||
self._ll_trace.append(self.log_likelihood() + self.log_prior())
|
||||
|
||||
#load a batch
|
||||
#load a batch and do the appropriate computations (kernel matrices, etc)
|
||||
self.load_batch()
|
||||
|
||||
#compute the (stochastic) gradient
|
||||
|
|
@ -319,7 +317,8 @@ class SVIGP(GPBase):
|
|||
|
||||
#compute the steps in all parameters
|
||||
vb_step = self.vb_steplength*natgrads[0]
|
||||
if (self.epochs>=1):#only move the parameters after the first epoch
|
||||
#only move the parameters after the first epoch and only if the steplength is nonzero
|
||||
if (self.epochs>=1) and (self.param_steplength > 0):
|
||||
param_step = self.momentum*param_step + self.param_steplength*grads
|
||||
else:
|
||||
param_step = 0.
|
||||
|
|
@ -341,6 +340,8 @@ class SVIGP(GPBase):
|
|||
|
||||
if self.epochs > 10:
|
||||
self._adapt_steplength()
|
||||
self._vb_steplength_trace.append(self.vb_steplength)
|
||||
self._param_steplength_trace.append(self.param_steplength)
|
||||
|
||||
self.iterations += 1
|
||||
|
||||
|
|
@ -349,17 +350,20 @@ class SVIGP(GPBase):
|
|||
if self.adapt_vb_steplength:
|
||||
# self._adaptive_vb_steplength()
|
||||
self._adaptive_vb_steplength_KL()
|
||||
self._vb_steplength_trace.append(self.vb_steplength)
|
||||
assert self.vb_steplength > 0
|
||||
#self._vb_steplength_trace.append(self.vb_steplength)
|
||||
assert self.vb_steplength >= 0
|
||||
|
||||
if self.adapt_param_steplength:
|
||||
self._adaptive_param_steplength()
|
||||
# self._adaptive_param_steplength_log()
|
||||
# self._adaptive_param_steplength_from_vb()
|
||||
self._param_steplength_trace.append(self.param_steplength)
|
||||
#self._param_steplength_trace.append(self.param_steplength)
|
||||
|
||||
def _adaptive_param_steplength(self):
|
||||
decr_factor = 0.02
|
||||
if hasattr(self, 'adapt_param_steplength_decr'):
|
||||
decr_factor = self.adapt_param_steplength_decr
|
||||
else:
|
||||
decr_factor = 0.02
|
||||
g_tp = self._transform_gradients(self._log_likelihood_gradients())
|
||||
self.gbar_tp = (1-1/self.tau_tp)*self.gbar_tp + 1/self.tau_tp * g_tp
|
||||
self.hbar_tp = (1-1/self.tau_tp)*self.hbar_tp + 1/self.tau_tp * np.dot(g_tp.T, g_tp)
|
||||
|
|
@ -433,7 +437,7 @@ class SVIGP(GPBase):
|
|||
else:
|
||||
return mu, diag_var[:,None]
|
||||
|
||||
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
|
||||
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False, sampling=False, num_samples=15000):
|
||||
# normalize X values
|
||||
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
|
||||
if X_variance_new is not None:
|
||||
|
|
@ -443,7 +447,7 @@ class SVIGP(GPBase):
|
|||
mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)
|
||||
|
||||
# now push through likelihood
|
||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
|
||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, sampling=sampling, num_samples=num_samples)
|
||||
|
||||
return mean, var, _025pm, _975pm
|
||||
|
||||
|
|
|
|||
|
|
@ -6,12 +6,11 @@
|
|||
Gaussian Processes classification
|
||||
"""
|
||||
import pylab as pb
|
||||
import numpy as np
|
||||
import GPy
|
||||
|
||||
default_seed = 10000
|
||||
|
||||
def oil(num_inducing=50, max_iters=100, kernel=None):
|
||||
def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
|
||||
"""
|
||||
Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
|
||||
|
||||
|
|
@ -25,7 +24,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None):
|
|||
Ytest[Ytest.flatten()==-1] = 0
|
||||
|
||||
# Create GP model
|
||||
m = GPy.models.SparseGPClassification(X, Y,kernel=kernel,num_inducing=num_inducing)
|
||||
m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, num_inducing=num_inducing)
|
||||
|
||||
# Contrain all parameters to be positive
|
||||
m.tie_params('.*len')
|
||||
|
|
@ -33,15 +32,16 @@ def oil(num_inducing=50, max_iters=100, kernel=None):
|
|||
m.update_likelihood_approximation()
|
||||
|
||||
# Optimize
|
||||
m.optimize(max_iters=max_iters)
|
||||
if optimize:
|
||||
m.optimize(max_iters=max_iters)
|
||||
print(m)
|
||||
|
||||
#Test
|
||||
probs = m.predict(Xtest)[0]
|
||||
GPy.util.classification.conf_matrix(probs,Ytest)
|
||||
GPy.util.classification.conf_matrix(probs, Ytest)
|
||||
return m
|
||||
|
||||
def toy_linear_1d_classification(seed=default_seed):
|
||||
def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
|
||||
"""
|
||||
Simple 1D classification example using EP approximation
|
||||
|
||||
|
|
@ -58,21 +58,23 @@ def toy_linear_1d_classification(seed=default_seed):
|
|||
m = GPy.models.GPClassification(data['X'], Y)
|
||||
|
||||
# Optimize
|
||||
#m.update_likelihood_approximation()
|
||||
# Parameters optimization:
|
||||
#m.optimize()
|
||||
#m.update_likelihood_approximation()
|
||||
m.pseudo_EM()
|
||||
if optimize:
|
||||
#m.update_likelihood_approximation()
|
||||
# Parameters optimization:
|
||||
#m.optimize()
|
||||
#m.update_likelihood_approximation()
|
||||
m.pseudo_EM()
|
||||
|
||||
# Plot
|
||||
fig, axes = pb.subplots(2,1)
|
||||
m.plot_f(ax=axes[0])
|
||||
m.plot(ax=axes[1])
|
||||
print(m)
|
||||
if plot:
|
||||
fig, axes = pb.subplots(2, 1)
|
||||
m.plot_f(ax=axes[0])
|
||||
m.plot(ax=axes[1])
|
||||
|
||||
print m
|
||||
return m
|
||||
|
||||
def toy_linear_1d_classification_laplace(seed=default_seed):
|
||||
def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=True):
|
||||
"""
|
||||
Simple 1D classification example using Laplace approximation
|
||||
|
||||
|
|
@ -90,24 +92,25 @@ def toy_linear_1d_classification_laplace(seed=default_seed):
|
|||
|
||||
# Model definition
|
||||
m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood)
|
||||
|
||||
print m
|
||||
|
||||
# Optimize
|
||||
#m.update_likelihood_approximation()
|
||||
# Parameters optimization:
|
||||
m.optimize('bfgs', messages=1)
|
||||
#m.pseudo_EM()
|
||||
if optimize:
|
||||
#m.update_likelihood_approximation()
|
||||
# Parameters optimization:
|
||||
m.optimize('bfgs', messages=1)
|
||||
#m.pseudo_EM()
|
||||
|
||||
# Plot
|
||||
fig, axes = pb.subplots(2,1)
|
||||
m.plot_f(ax=axes[0])
|
||||
m.plot(ax=axes[1])
|
||||
print(m)
|
||||
if plot:
|
||||
fig, axes = pb.subplots(2, 1)
|
||||
m.plot_f(ax=axes[0])
|
||||
m.plot(ax=axes[1])
|
||||
|
||||
print m
|
||||
return m
|
||||
|
||||
|
||||
def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
|
||||
def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, optimize=True, plot=True):
|
||||
"""
|
||||
Sparse 1D classification example
|
||||
|
||||
|
|
@ -121,24 +124,26 @@ def sparse_toy_linear_1d_classification(num_inducing=10,seed=default_seed):
|
|||
Y[Y.flatten() == -1] = 0
|
||||
|
||||
# Model definition
|
||||
m = GPy.models.SparseGPClassification(data['X'], Y,num_inducing=num_inducing)
|
||||
m['.*len']= 4.
|
||||
m = GPy.models.SparseGPClassification(data['X'], Y, num_inducing=num_inducing)
|
||||
m['.*len'] = 4.
|
||||
|
||||
# Optimize
|
||||
#m.update_likelihood_approximation()
|
||||
# Parameters optimization:
|
||||
#m.optimize()
|
||||
m.pseudo_EM()
|
||||
if optimize:
|
||||
#m.update_likelihood_approximation()
|
||||
# Parameters optimization:
|
||||
#m.optimize()
|
||||
m.pseudo_EM()
|
||||
|
||||
# Plot
|
||||
fig, axes = pb.subplots(2,1)
|
||||
m.plot_f(ax=axes[0])
|
||||
m.plot(ax=axes[1])
|
||||
print(m)
|
||||
if plot:
|
||||
fig, axes = pb.subplots(2, 1)
|
||||
m.plot_f(ax=axes[0])
|
||||
m.plot(ax=axes[1])
|
||||
|
||||
print m
|
||||
return m
|
||||
|
||||
def toy_heaviside(seed=default_seed):
|
||||
def toy_heaviside(seed=default_seed, optimize=True, plot=True):
|
||||
"""
|
||||
Simple 1D classification example using a heavy side gp transformation
|
||||
|
||||
|
|
@ -153,24 +158,26 @@ def toy_heaviside(seed=default_seed):
|
|||
|
||||
# Model definition
|
||||
noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
|
||||
likelihood = GPy.likelihoods.EP(Y,noise_model)
|
||||
likelihood = GPy.likelihoods.EP(Y, noise_model)
|
||||
m = GPy.models.GPClassification(data['X'], likelihood=likelihood)
|
||||
|
||||
# Optimize
|
||||
m.update_likelihood_approximation()
|
||||
# Parameters optimization:
|
||||
m.optimize()
|
||||
#m.pseudo_EM()
|
||||
if optimize:
|
||||
m.update_likelihood_approximation()
|
||||
# Parameters optimization:
|
||||
m.optimize()
|
||||
#m.pseudo_EM()
|
||||
|
||||
# Plot
|
||||
fig, axes = pb.subplots(2,1)
|
||||
m.plot_f(ax=axes[0])
|
||||
m.plot(ax=axes[1])
|
||||
print(m)
|
||||
if plot:
|
||||
fig, axes = pb.subplots(2, 1)
|
||||
m.plot_f(ax=axes[0])
|
||||
m.plot(ax=axes[1])
|
||||
|
||||
print m
|
||||
return m
|
||||
|
||||
def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None):
|
||||
def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None, optimize=True, plot=True):
|
||||
"""
|
||||
Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
|
||||
|
||||
|
|
@ -187,7 +194,7 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
|
|||
Y[Y.flatten()==-1] = 0
|
||||
|
||||
if model_type == 'Full':
|
||||
m = GPy.models.GPClassification(data['X'], Y,kernel=kernel)
|
||||
m = GPy.models.GPClassification(data['X'], Y, kernel=kernel)
|
||||
|
||||
elif model_type == 'DTC':
|
||||
m = GPy.models.SparseGPClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
|
||||
|
|
@ -197,8 +204,11 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
|
|||
m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
|
||||
m['.*len'] = 3.
|
||||
|
||||
m.pseudo_EM()
|
||||
print(m)
|
||||
m.plot()
|
||||
if optimize:
|
||||
m.pseudo_EM()
|
||||
|
||||
if plot:
|
||||
m.plot()
|
||||
|
||||
print m
|
||||
return m
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
import numpy as _np
|
||||
default_seed = _np.random.seed(123344)
|
||||
|
||||
def bgplvm_test_model(seed=default_seed, optimize=0, verbose=1, plot=0):
|
||||
def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False):
|
||||
"""
|
||||
model for testing purposes. Samples from a GP with rbf kernel and learns
|
||||
the samples with a new kernel. Normally not for optimization, just model cheking
|
||||
|
|
@ -36,55 +36,6 @@ def bgplvm_test_model(seed=default_seed, optimize=0, verbose=1, plot=0):
|
|||
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
|
||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
|
||||
|
||||
m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing)
|
||||
m.lengthscales = lengthscales
|
||||
|
||||
if plot:
|
||||
import matplotlib.pyplot as pb
|
||||
m.plot()
|
||||
pb.title('PCA initialisation')
|
||||
|
||||
if optimize:
|
||||
m.optimize('scg', messages=verbose)
|
||||
if plot:
|
||||
m.plot()
|
||||
pb.title('After optimisation')
|
||||
|
||||
return m
|
||||
|
||||
def bgplvm_test_model_missing_data(seed=default_seed, optimize=0, verbose=1, plot=0):
|
||||
"""
|
||||
model for testing purposes. Samples from a GP with rbf kernel and learns
|
||||
the samples with a new kernel. Normally not for optimization, just model cheking
|
||||
"""
|
||||
from GPy.likelihoods.gaussian import Gaussian
|
||||
import GPy, numpy as np
|
||||
|
||||
num_inputs = 13
|
||||
num_inducing = 5
|
||||
if plot:
|
||||
output_dim = 1
|
||||
input_dim = 2
|
||||
else:
|
||||
input_dim = 2
|
||||
output_dim = 25
|
||||
|
||||
# generate GPLVM-like data
|
||||
X = _np.random.rand(num_inputs, input_dim)
|
||||
lengthscales = _np.random.rand(input_dim)
|
||||
k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True)
|
||||
+ GPy.kern.white(input_dim, 0.01))
|
||||
K = k.K(X)
|
||||
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T
|
||||
lik = Gaussian(Y, normalize=True)
|
||||
|
||||
k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
|
||||
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
|
||||
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
|
||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
|
||||
|
||||
m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing)
|
||||
#===========================================================================
|
||||
# randomly obstruct data with percentage p
|
||||
|
|
@ -113,7 +64,7 @@ def bgplvm_test_model_missing_data(seed=default_seed, optimize=0, verbose=1, plo
|
|||
|
||||
return m, m2
|
||||
|
||||
def gplvm_oil_100(optimize=1, verbose=1, plot=1):
|
||||
def gplvm_oil_100(optimize=True, verbose=1, plot=True):
|
||||
import GPy
|
||||
data = GPy.util.datasets.oil_100()
|
||||
Y = data['X']
|
||||
|
|
@ -125,7 +76,7 @@ def gplvm_oil_100(optimize=1, verbose=1, plot=1):
|
|||
if plot: m.plot_latent(labels=m.data_labels)
|
||||
return m
|
||||
|
||||
def sparse_gplvm_oil(optimize=1, verbose=0, plot=1, N=100, Q=6, num_inducing=15, max_iters=50):
|
||||
def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_inducing=15, max_iters=50):
|
||||
import GPy
|
||||
_np.random.seed(0)
|
||||
data = GPy.util.datasets.oil()
|
||||
|
|
@ -143,7 +94,7 @@ def sparse_gplvm_oil(optimize=1, verbose=0, plot=1, N=100, Q=6, num_inducing=15,
|
|||
m.kern.plot_ARD()
|
||||
return m
|
||||
|
||||
def swiss_roll(optimize=1, verbose=1, plot=1, N=1000, num_inducing=15, Q=4, sigma=.2):
|
||||
def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4, sigma=.2):
|
||||
import GPy
|
||||
from GPy.util.datasets import swiss_roll_generated
|
||||
from GPy.models import BayesianGPLVM
|
||||
|
|
@ -201,7 +152,7 @@ def swiss_roll(optimize=1, verbose=1, plot=1, N=1000, num_inducing=15, Q=4, sigm
|
|||
|
||||
return m
|
||||
|
||||
def bgplvm_oil(optimize=1, verbose=1, plot=1, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
|
||||
def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
|
||||
import GPy
|
||||
from GPy.likelihoods import Gaussian
|
||||
from matplotlib import pyplot as plt
|
||||
|
|
@ -266,7 +217,8 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
|
|||
Ylist = [Y1, Y2, Y3]
|
||||
|
||||
if plot_sim:
|
||||
import pylab, matplotlib.cm as cm
|
||||
import pylab
|
||||
import matplotlib.cm as cm
|
||||
import itertools
|
||||
fig = pylab.figure("MRD Simulation Data", figsize=(8, 6))
|
||||
fig.clf()
|
||||
|
|
@ -277,7 +229,7 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
|
|||
ax.legend()
|
||||
for i, Y in enumerate(Ylist):
|
||||
ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
|
||||
ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
|
||||
ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
|
||||
ax.set_title("Y{}".format(i + 1))
|
||||
pylab.draw()
|
||||
pylab.tight_layout()
|
||||
|
|
@ -302,8 +254,8 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
|
|||
# m['linear_variance'] = .01
|
||||
# return m
|
||||
|
||||
def bgplvm_simulation(optimize=1, verbose=1,
|
||||
plot=1, plot_sim=False,
|
||||
def bgplvm_simulation(optimize=True, verbose=1,
|
||||
plot=True, plot_sim=False,
|
||||
max_iters=2e4,
|
||||
):
|
||||
from GPy import kern
|
||||
|
|
|
|||
|
|
@ -1,296 +0,0 @@
|
|||
import GPy
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from GPy.util import datasets
|
||||
#np.random.seed(1)
|
||||
|
||||
def student_t_approx():
|
||||
"""
|
||||
Example of regressing with a student t likelihood
|
||||
"""
|
||||
real_std = 0.1
|
||||
#Start a function, any function
|
||||
X = np.linspace(0.0, np.pi*2, 100)[:, None]
|
||||
Y = np.sin(X) + np.random.randn(*X.shape)*real_std
|
||||
Yc = Y.copy()
|
||||
|
||||
X_full = np.linspace(0.0, np.pi*2, 500)[:, None]
|
||||
Y_full = np.sin(X_full)
|
||||
|
||||
Y = Y/Y.max()
|
||||
|
||||
#Slightly noisy data
|
||||
Yc[75:80] += 1
|
||||
|
||||
#Very noisy data
|
||||
#Yc[10] += 100
|
||||
#Yc[25] += 10
|
||||
#Yc[23] += 10
|
||||
#Yc[26] += 1000
|
||||
#Yc[24] += 10
|
||||
#Yc = Yc/Yc.max()
|
||||
|
||||
#Add student t random noise to datapoints
|
||||
deg_free = 5
|
||||
print "Real noise: ", real_std
|
||||
initial_var_guess = 0.5
|
||||
|
||||
#t_rv = t(deg_free, loc=0, scale=real_var)
|
||||
#noise = t_rvrvs(size=Y.shape)
|
||||
#Y += noise
|
||||
|
||||
plt.figure(1)
|
||||
plt.suptitle('Gaussian likelihood')
|
||||
# Kernel object
|
||||
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
kernel2 = kernel1.copy()
|
||||
kernel3 = kernel1.copy()
|
||||
kernel4 = kernel1.copy()
|
||||
kernel5 = kernel1.copy()
|
||||
kernel6 = kernel1.copy()
|
||||
|
||||
print "Clean Gaussian"
|
||||
#A GP should completely break down due to the points as they get a lot of weight
|
||||
# create simple GP model
|
||||
m = GPy.models.GPRegression(X, Y, kernel=kernel1)
|
||||
# optimize
|
||||
m.ensure_default_constraints()
|
||||
m.constrain_fixed('white', 1e-4)
|
||||
m.randomize()
|
||||
m.optimize()
|
||||
# plot
|
||||
ax = plt.subplot(211)
|
||||
m.plot(ax=ax)
|
||||
plt.plot(X_full, Y_full)
|
||||
plt.ylim(-1.5, 1.5)
|
||||
plt.title('Gaussian clean')
|
||||
print m
|
||||
|
||||
#Corrupt
|
||||
print "Corrupt Gaussian"
|
||||
m = GPy.models.GPRegression(X, Yc, kernel=kernel2)
|
||||
m.ensure_default_constraints()
|
||||
m.constrain_fixed('white', 1e-4)
|
||||
m.randomize()
|
||||
m.optimize()
|
||||
ax = plt.subplot(212)
|
||||
m.plot(ax=ax)
|
||||
plt.plot(X_full, Y_full)
|
||||
plt.ylim(-1.5, 1.5)
|
||||
plt.title('Gaussian corrupt')
|
||||
print m
|
||||
|
||||
plt.figure(2)
|
||||
plt.suptitle('Student-t likelihood')
|
||||
edited_real_sd = initial_var_guess
|
||||
|
||||
print "Clean student t, rasm"
|
||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
|
||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution)
|
||||
m = GPy.models.GPRegression(X, Y.copy(), kernel6, likelihood=stu_t_likelihood)
|
||||
m.ensure_default_constraints()
|
||||
m.constrain_positive('t_noise')
|
||||
m.constrain_fixed('white', 1e-4)
|
||||
m.randomize()
|
||||
#m.update_likelihood_approximation()
|
||||
m.optimize()
|
||||
print(m)
|
||||
ax = plt.subplot(211)
|
||||
m.plot(ax=ax)
|
||||
plt.plot(X_full, Y_full)
|
||||
plt.ylim(-1.5, 1.5)
|
||||
plt.title('Student-t rasm clean')
|
||||
|
||||
print "Corrupt student t, rasm"
|
||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
|
||||
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution)
|
||||
m = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
|
||||
m.ensure_default_constraints()
|
||||
m.constrain_bounded('t_noise', 1e-6, 10.)
|
||||
m.constrain_fixed('white', 1e-4)
|
||||
m.randomize()
|
||||
for a in range(1):
|
||||
m.randomize()
|
||||
m_start = m.copy()
|
||||
print m
|
||||
m.optimize('scg', messages=1)
|
||||
print(m)
|
||||
ax = plt.subplot(212)
|
||||
m.plot(ax=ax)
|
||||
plt.plot(X_full, Y_full)
|
||||
plt.ylim(-1.5, 1.5)
|
||||
plt.title('Student-t rasm corrupt')
|
||||
|
||||
return m
|
||||
|
||||
def boston_example():
|
||||
import sklearn
|
||||
from sklearn.cross_validation import KFold
|
||||
optimizer='bfgs'
|
||||
messages=0
|
||||
data = datasets.boston_housing()
|
||||
degrees_freedoms = [3, 5, 8, 10]
|
||||
X = data['X'].copy()
|
||||
Y = data['Y'].copy()
|
||||
X = X-X.mean(axis=0)
|
||||
X = X/X.std(axis=0)
|
||||
Y = Y-Y.mean()
|
||||
Y = Y/Y.std()
|
||||
num_folds = 10
|
||||
kf = KFold(len(Y), n_folds=num_folds, indices=True)
|
||||
num_models = len(degrees_freedoms) + 3 #3 for baseline, gaussian, gaussian laplace approx
|
||||
score_folds = np.zeros((num_models, num_folds))
|
||||
pred_density = score_folds.copy()
|
||||
|
||||
def rmse(Y, Ystar):
|
||||
return np.sqrt(np.mean((Y-Ystar)**2))
|
||||
|
||||
for n, (train, test) in enumerate(kf):
|
||||
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
|
||||
print "Fold {}".format(n)
|
||||
|
||||
noise = 1e-1 #np.exp(-2)
|
||||
rbf_len = 0.5
|
||||
data_axis_plot = 4
|
||||
plot = False
|
||||
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||
kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||
|
||||
#Baseline
|
||||
score_folds[0, n] = rmse(Y_test, np.mean(Y_train))
|
||||
|
||||
#Gaussian GP
|
||||
print "Gauss GP"
|
||||
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
|
||||
mgp.ensure_default_constraints()
|
||||
mgp.constrain_fixed('white', 1e-5)
|
||||
mgp['rbf_len'] = rbf_len
|
||||
mgp['noise'] = noise
|
||||
print mgp
|
||||
mgp.optimize(optimizer=optimizer, messages=messages)
|
||||
Y_test_pred = mgp.predict(X_test)
|
||||
score_folds[1, n] = rmse(Y_test, Y_test_pred[0])
|
||||
pred_density[1, n] = np.mean(mgp.log_predictive_density(X_test, Y_test))
|
||||
print mgp
|
||||
print pred_density
|
||||
if plot:
|
||||
plt.figure()
|
||||
plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
|
||||
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
|
||||
plt.title('GP gauss')
|
||||
|
||||
print "Gaussian Laplace GP"
|
||||
N, D = Y_train.shape
|
||||
g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
|
||||
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
|
||||
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
|
||||
mg.ensure_default_constraints()
|
||||
mg.constrain_positive('noise_variance')
|
||||
mg.constrain_fixed('white', 1e-5)
|
||||
mg['rbf_len'] = rbf_len
|
||||
mg['noise'] = noise
|
||||
print mg
|
||||
try:
|
||||
mg.optimize(optimizer=optimizer, messages=messages)
|
||||
except Exception:
|
||||
print "Blew up"
|
||||
Y_test_pred = mg.predict(X_test)
|
||||
score_folds[2, n] = rmse(Y_test, Y_test_pred[0])
|
||||
pred_density[2, n] = np.mean(mg.log_predictive_density(X_test, Y_test))
|
||||
print pred_density
|
||||
print mg
|
||||
if plot:
|
||||
plt.figure()
|
||||
plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
|
||||
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
|
||||
plt.title('Lap gauss')
|
||||
|
||||
for stu_num, df in enumerate(degrees_freedoms):
|
||||
#Student T
|
||||
print "Student-T GP {}df".format(df)
|
||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
|
||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
|
||||
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
|
||||
mstu_t.ensure_default_constraints()
|
||||
mstu_t.constrain_fixed('white', 1e-5)
|
||||
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
|
||||
mstu_t['rbf_len'] = rbf_len
|
||||
mstu_t['t_noise'] = noise
|
||||
print mstu_t
|
||||
try:
|
||||
mstu_t.optimize(optimizer=optimizer, messages=messages)
|
||||
except Exception:
|
||||
print "Blew up"
|
||||
Y_test_pred = mstu_t.predict(X_test)
|
||||
score_folds[3+stu_num, n] = rmse(Y_test, Y_test_pred[0])
|
||||
pred_density[3+stu_num, n] = np.mean(mstu_t.log_predictive_density(X_test, Y_test))
|
||||
print pred_density
|
||||
print mstu_t
|
||||
if plot:
|
||||
plt.figure()
|
||||
plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
|
||||
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
|
||||
plt.title('Stu t {}df'.format(df))
|
||||
|
||||
print "Average scores: {}".format(np.mean(score_folds, 1))
|
||||
print "Average pred density: {}".format(np.mean(pred_density, 1))
|
||||
|
||||
#Plotting
|
||||
stu_t_legends = ['Student T, df={}'.format(df) for df in degrees_freedoms]
|
||||
legends = ['Baseline', 'Gaussian', 'Laplace Approx Gaussian'] + stu_t_legends
|
||||
|
||||
#Plot boxplots for RMSE density
|
||||
fig = plt.figure()
|
||||
ax=fig.add_subplot(111)
|
||||
plt.title('RMSE')
|
||||
bp = ax.boxplot(score_folds.T, notch=0, sym='+', vert=1, whis=1.5)
|
||||
plt.setp(bp['boxes'], color='black')
|
||||
plt.setp(bp['whiskers'], color='black')
|
||||
plt.setp(bp['fliers'], color='red', marker='+')
|
||||
xtickNames = plt.setp(ax, xticklabels=legends)
|
||||
plt.setp(xtickNames, rotation=45, fontsize=8)
|
||||
ax.set_ylabel('RMSE')
|
||||
ax.set_xlabel('Distribution')
|
||||
#Make grid and put it below boxes
|
||||
ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
|
||||
alpha=0.5)
|
||||
ax.set_axisbelow(True)
|
||||
|
||||
#Plot boxplots for predictive density
|
||||
fig = plt.figure()
|
||||
ax=fig.add_subplot(111)
|
||||
plt.title('Predictive density')
|
||||
bp = ax.boxplot(pred_density[1:,:].T, notch=0, sym='+', vert=1, whis=1.5)
|
||||
plt.setp(bp['boxes'], color='black')
|
||||
plt.setp(bp['whiskers'], color='black')
|
||||
plt.setp(bp['fliers'], color='red', marker='+')
|
||||
xtickNames = plt.setp(ax, xticklabels=legends[1:])
|
||||
plt.setp(xtickNames, rotation=45, fontsize=8)
|
||||
ax.set_ylabel('Mean Log probability P(Y*|Y)')
|
||||
ax.set_xlabel('Distribution')
|
||||
#Make grid and put it below boxes
|
||||
ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
|
||||
alpha=0.5)
|
||||
ax.set_axisbelow(True)
|
||||
return mstu_t
|
||||
|
||||
def precipitation_example():
|
||||
import sklearn
|
||||
from sklearn.cross_validation import KFold
|
||||
data = datasets.boston_housing()
|
||||
X = data['X'].copy()
|
||||
Y = data['Y'].copy()
|
||||
X = X-X.mean(axis=0)
|
||||
X = X/X.std(axis=0)
|
||||
Y = Y-Y.mean()
|
||||
Y = Y/Y.std()
|
||||
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
||||
num_folds = 10
|
||||
kf = KFold(len(Y), n_folds=num_folds, indices=True)
|
||||
score_folds = np.zeros((4, num_folds))
|
||||
def rmse(Y, Ystar):
|
||||
return np.sqrt(np.mean((Y-Ystar)**2))
|
||||
#for train, test in kf:
|
||||
for n, (train, test) in enumerate(kf):
|
||||
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
|
||||
print "Fold {}".format(n)
|
||||
286
GPy/examples/non_gaussian.py
Normal file
286
GPy/examples/non_gaussian.py
Normal file
|
|
@ -0,0 +1,286 @@
|
|||
import GPy
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from GPy.util import datasets
|
||||
|
||||
def student_t_approx(optimize=True, plot=True):
|
||||
"""
|
||||
Example of regressing with a student t likelihood using Laplace
|
||||
"""
|
||||
real_std = 0.1
|
||||
#Start a function, any function
|
||||
X = np.linspace(0.0, np.pi*2, 100)[:, None]
|
||||
Y = np.sin(X) + np.random.randn(*X.shape)*real_std
|
||||
Y = Y/Y.max()
|
||||
Yc = Y.copy()
|
||||
|
||||
X_full = np.linspace(0.0, np.pi*2, 500)[:, None]
|
||||
Y_full = np.sin(X_full)
|
||||
Y_full = Y_full/Y_full.max()
|
||||
|
||||
#Slightly noisy data
|
||||
Yc[75:80] += 1
|
||||
|
||||
#Very noisy data
|
||||
#Yc[10] += 100
|
||||
#Yc[25] += 10
|
||||
#Yc[23] += 10
|
||||
#Yc[26] += 1000
|
||||
#Yc[24] += 10
|
||||
#Yc = Yc/Yc.max()
|
||||
|
||||
#Add student t random noise to datapoints
|
||||
deg_free = 5
|
||||
print "Real noise: ", real_std
|
||||
initial_var_guess = 0.5
|
||||
edited_real_sd = initial_var_guess
|
||||
|
||||
# Kernel object
|
||||
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
kernel2 = kernel1.copy()
|
||||
kernel3 = kernel1.copy()
|
||||
kernel4 = kernel1.copy()
|
||||
|
||||
#Gaussian GP model on clean data
|
||||
m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
|
||||
# optimize
|
||||
m1.ensure_default_constraints()
|
||||
m1.constrain_fixed('white', 1e-5)
|
||||
m1.randomize()
|
||||
|
||||
#Gaussian GP model on corrupt data
|
||||
m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2)
|
||||
m2.ensure_default_constraints()
|
||||
m2.constrain_fixed('white', 1e-5)
|
||||
m2.randomize()
|
||||
|
||||
#Student t GP model on clean data
|
||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
|
||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution)
|
||||
m3 = GPy.models.GPRegression(X, Y.copy(), kernel3, likelihood=stu_t_likelihood)
|
||||
m3.ensure_default_constraints()
|
||||
m3.constrain_bounded('t_noise', 1e-6, 10.)
|
||||
m3.constrain_fixed('white', 1e-5)
|
||||
m3.randomize()
|
||||
|
||||
#Student t GP model on corrupt data
|
||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
|
||||
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution)
|
||||
m4 = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
|
||||
m4.ensure_default_constraints()
|
||||
m4.constrain_bounded('t_noise', 1e-6, 10.)
|
||||
m4.constrain_fixed('white', 1e-5)
|
||||
m4.randomize()
|
||||
|
||||
if optimize:
|
||||
optimizer='scg'
|
||||
print "Clean Gaussian"
|
||||
m1.optimize(optimizer, messages=1)
|
||||
print "Corrupt Gaussian"
|
||||
m2.optimize(optimizer, messages=1)
|
||||
print "Clean student t"
|
||||
m3.optimize(optimizer, messages=1)
|
||||
print "Corrupt student t"
|
||||
m4.optimize(optimizer, messages=1)
|
||||
|
||||
if plot:
|
||||
plt.figure(1)
|
||||
plt.suptitle('Gaussian likelihood')
|
||||
ax = plt.subplot(211)
|
||||
m1.plot(ax=ax)
|
||||
plt.plot(X_full, Y_full)
|
||||
plt.ylim(-1.5, 1.5)
|
||||
plt.title('Gaussian clean')
|
||||
|
||||
ax = plt.subplot(212)
|
||||
m2.plot(ax=ax)
|
||||
plt.plot(X_full, Y_full)
|
||||
plt.ylim(-1.5, 1.5)
|
||||
plt.title('Gaussian corrupt')
|
||||
|
||||
plt.figure(2)
|
||||
plt.suptitle('Student-t likelihood')
|
||||
ax = plt.subplot(211)
|
||||
m3.plot(ax=ax)
|
||||
plt.plot(X_full, Y_full)
|
||||
plt.ylim(-1.5, 1.5)
|
||||
plt.title('Student-t rasm clean')
|
||||
|
||||
ax = plt.subplot(212)
|
||||
m4.plot(ax=ax)
|
||||
plt.plot(X_full, Y_full)
|
||||
plt.ylim(-1.5, 1.5)
|
||||
plt.title('Student-t rasm corrupt')
|
||||
|
||||
return m1, m2, m3, m4
|
||||
|
||||
def boston_example(optimize=True, plot=True):
|
||||
import sklearn
|
||||
from sklearn.cross_validation import KFold
|
||||
optimizer='bfgs'
|
||||
messages=0
|
||||
data = datasets.boston_housing()
|
||||
degrees_freedoms = [3, 5, 8, 10]
|
||||
X = data['X'].copy()
|
||||
Y = data['Y'].copy()
|
||||
X = X-X.mean(axis=0)
|
||||
X = X/X.std(axis=0)
|
||||
Y = Y-Y.mean()
|
||||
Y = Y/Y.std()
|
||||
num_folds = 10
|
||||
kf = KFold(len(Y), n_folds=num_folds, indices=True)
|
||||
num_models = len(degrees_freedoms) + 3 #3 for baseline, gaussian, gaussian laplace approx
|
||||
score_folds = np.zeros((num_models, num_folds))
|
||||
pred_density = score_folds.copy()
|
||||
|
||||
def rmse(Y, Ystar):
|
||||
return np.sqrt(np.mean((Y-Ystar)**2))
|
||||
|
||||
for n, (train, test) in enumerate(kf):
|
||||
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
|
||||
print "Fold {}".format(n)
|
||||
|
||||
noise = 1e-1 #np.exp(-2)
|
||||
rbf_len = 0.5
|
||||
data_axis_plot = 4
|
||||
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||
kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||
|
||||
#Baseline
|
||||
score_folds[0, n] = rmse(Y_test, np.mean(Y_train))
|
||||
|
||||
#Gaussian GP
|
||||
print "Gauss GP"
|
||||
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
|
||||
mgp.ensure_default_constraints()
|
||||
mgp.constrain_fixed('white', 1e-5)
|
||||
mgp['rbf_len'] = rbf_len
|
||||
mgp['noise'] = noise
|
||||
print mgp
|
||||
if optimize:
|
||||
mgp.optimize(optimizer=optimizer, messages=messages)
|
||||
Y_test_pred = mgp.predict(X_test)
|
||||
score_folds[1, n] = rmse(Y_test, Y_test_pred[0])
|
||||
pred_density[1, n] = np.mean(mgp.log_predictive_density(X_test, Y_test))
|
||||
print mgp
|
||||
print pred_density
|
||||
|
||||
print "Gaussian Laplace GP"
|
||||
N, D = Y_train.shape
|
||||
g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
|
||||
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
|
||||
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
|
||||
mg.ensure_default_constraints()
|
||||
mg.constrain_positive('noise_variance')
|
||||
mg.constrain_fixed('white', 1e-5)
|
||||
mg['rbf_len'] = rbf_len
|
||||
mg['noise'] = noise
|
||||
print mg
|
||||
if optimize:
|
||||
mg.optimize(optimizer=optimizer, messages=messages)
|
||||
Y_test_pred = mg.predict(X_test)
|
||||
score_folds[2, n] = rmse(Y_test, Y_test_pred[0])
|
||||
pred_density[2, n] = np.mean(mg.log_predictive_density(X_test, Y_test))
|
||||
print pred_density
|
||||
print mg
|
||||
|
||||
for stu_num, df in enumerate(degrees_freedoms):
|
||||
#Student T
|
||||
print "Student-T GP {}df".format(df)
|
||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
|
||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
|
||||
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
|
||||
mstu_t.ensure_default_constraints()
|
||||
mstu_t.constrain_fixed('white', 1e-5)
|
||||
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
|
||||
mstu_t['rbf_len'] = rbf_len
|
||||
mstu_t['t_noise'] = noise
|
||||
print mstu_t
|
||||
if optimize:
|
||||
mstu_t.optimize(optimizer=optimizer, messages=messages)
|
||||
Y_test_pred = mstu_t.predict(X_test)
|
||||
score_folds[3+stu_num, n] = rmse(Y_test, Y_test_pred[0])
|
||||
pred_density[3+stu_num, n] = np.mean(mstu_t.log_predictive_density(X_test, Y_test))
|
||||
print pred_density
|
||||
print mstu_t
|
||||
|
||||
if plot:
|
||||
plt.figure()
|
||||
plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
|
||||
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
|
||||
plt.title('GP gauss')
|
||||
|
||||
plt.figure()
|
||||
plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
|
||||
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
|
||||
plt.title('Lap gauss')
|
||||
|
||||
plt.figure()
|
||||
plt.scatter(X_test[:, data_axis_plot], Y_test_pred[0])
|
||||
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
|
||||
plt.title('Stu t {}df'.format(df))
|
||||
|
||||
print "Average scores: {}".format(np.mean(score_folds, 1))
|
||||
print "Average pred density: {}".format(np.mean(pred_density, 1))
|
||||
|
||||
if plot:
|
||||
#Plotting
|
||||
stu_t_legends = ['Student T, df={}'.format(df) for df in degrees_freedoms]
|
||||
legends = ['Baseline', 'Gaussian', 'Laplace Approx Gaussian'] + stu_t_legends
|
||||
|
||||
#Plot boxplots for RMSE density
|
||||
fig = plt.figure()
|
||||
ax=fig.add_subplot(111)
|
||||
plt.title('RMSE')
|
||||
bp = ax.boxplot(score_folds.T, notch=0, sym='+', vert=1, whis=1.5)
|
||||
plt.setp(bp['boxes'], color='black')
|
||||
plt.setp(bp['whiskers'], color='black')
|
||||
plt.setp(bp['fliers'], color='red', marker='+')
|
||||
xtickNames = plt.setp(ax, xticklabels=legends)
|
||||
plt.setp(xtickNames, rotation=45, fontsize=8)
|
||||
ax.set_ylabel('RMSE')
|
||||
ax.set_xlabel('Distribution')
|
||||
#Make grid and put it below boxes
|
||||
ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
|
||||
alpha=0.5)
|
||||
ax.set_axisbelow(True)
|
||||
|
||||
#Plot boxplots for predictive density
|
||||
fig = plt.figure()
|
||||
ax=fig.add_subplot(111)
|
||||
plt.title('Predictive density')
|
||||
bp = ax.boxplot(pred_density[1:,:].T, notch=0, sym='+', vert=1, whis=1.5)
|
||||
plt.setp(bp['boxes'], color='black')
|
||||
plt.setp(bp['whiskers'], color='black')
|
||||
plt.setp(bp['fliers'], color='red', marker='+')
|
||||
xtickNames = plt.setp(ax, xticklabels=legends[1:])
|
||||
plt.setp(xtickNames, rotation=45, fontsize=8)
|
||||
ax.set_ylabel('Mean Log probability P(Y*|Y)')
|
||||
ax.set_xlabel('Distribution')
|
||||
#Make grid and put it below boxes
|
||||
ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',
|
||||
alpha=0.5)
|
||||
ax.set_axisbelow(True)
|
||||
return mstu_t
|
||||
|
||||
#def precipitation_example():
|
||||
#import sklearn
|
||||
#from sklearn.cross_validation import KFold
|
||||
#data = datasets.boston_housing()
|
||||
#X = data['X'].copy()
|
||||
#Y = data['Y'].copy()
|
||||
#X = X-X.mean(axis=0)
|
||||
#X = X/X.std(axis=0)
|
||||
#Y = Y-Y.mean()
|
||||
#Y = Y/Y.std()
|
||||
#import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
||||
#num_folds = 10
|
||||
#kf = KFold(len(Y), n_folds=num_folds, indices=True)
|
||||
#score_folds = np.zeros((4, num_folds))
|
||||
#def rmse(Y, Ystar):
|
||||
#return np.sqrt(np.mean((Y-Ystar)**2))
|
||||
##for train, test in kf:
|
||||
#for n, (train, test) in enumerate(kf):
|
||||
#X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
|
||||
#print "Fold {}".format(n)
|
||||
|
||||
|
|
@ -101,9 +101,7 @@ def coregionalization_sparse(optimize=True, plot=True):
|
|||
|
||||
return m
|
||||
|
||||
|
||||
|
||||
def epomeo_gpx(optimize=True, plot=True):
|
||||
def epomeo_gpx(max_iters=200, optimize=True, plot=True):
|
||||
"""
|
||||
Perform Gaussian process regression on the latitude and longitude data
|
||||
from the Mount Epomeo runs. Requires gpxpy to be installed on your system
|
||||
|
|
@ -141,8 +139,7 @@ def epomeo_gpx(optimize=True, plot=True):
|
|||
|
||||
return m
|
||||
|
||||
|
||||
def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=10000, max_iters=300):
|
||||
def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=10000, max_iters=300, optimize=True, plot=True):
|
||||
"""
|
||||
Show an example of a multimodal error surface for Gaussian process
|
||||
regression. Gene 939 has bimodal behaviour where the noisy mode is
|
||||
|
|
@ -160,13 +157,14 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
|
|||
data['Y'] = data['Y'] - np.mean(data['Y'])
|
||||
|
||||
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
|
||||
pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet)
|
||||
ax = pb.gca()
|
||||
pb.xlabel('length scale')
|
||||
pb.ylabel('log_10 SNR')
|
||||
if plot:
|
||||
pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet)
|
||||
ax = pb.gca()
|
||||
pb.xlabel('length scale')
|
||||
pb.ylabel('log_10 SNR')
|
||||
|
||||
xlim = ax.get_xlim()
|
||||
ylim = ax.get_ylim()
|
||||
xlim = ax.get_xlim()
|
||||
ylim = ax.get_ylim()
|
||||
|
||||
# Now run a few optimizations
|
||||
models = []
|
||||
|
|
@ -183,16 +181,19 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
|
|||
optim_point_y[0] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']);
|
||||
|
||||
# optimize
|
||||
m.optimize('scg', xtol=1e-6, ftol=1e-6, max_iters=max_iters)
|
||||
if optimize:
|
||||
m.optimize('scg', xtol=1e-6, ftol=1e-6, max_iters=max_iters)
|
||||
|
||||
optim_point_x[1] = m['rbf_lengthscale']
|
||||
optim_point_y[1] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']);
|
||||
|
||||
pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1] - optim_point_x[0], optim_point_y[1] - optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
|
||||
if plot:
|
||||
pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1] - optim_point_x[0], optim_point_y[1] - optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
|
||||
models.append(m)
|
||||
|
||||
ax.set_xlim(xlim)
|
||||
ax.set_ylim(ylim)
|
||||
if plot:
|
||||
ax.set_xlim(xlim)
|
||||
ax.set_ylim(ylim)
|
||||
return m # (models, lls)
|
||||
|
||||
def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf):
|
||||
|
|
@ -295,6 +296,7 @@ def toy_poisson_rbf_1d(optimize=True, plot=True):
|
|||
|
||||
def toy_poisson_rbf_1d_laplace(optimize=True, plot=True):
|
||||
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
|
||||
optimizer='scg'
|
||||
x_len = 30
|
||||
X = np.linspace(0, 10, x_len)[:, None]
|
||||
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
|
||||
|
|
@ -307,7 +309,7 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True):
|
|||
m = GPy.models.GPRegression(X, Y, likelihood=likelihood)
|
||||
|
||||
if optimize:
|
||||
m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
|
||||
m.optimize(optimizer)
|
||||
if plot:
|
||||
m.plot()
|
||||
# plot the real underlying rate function
|
||||
|
|
@ -315,9 +317,7 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True):
|
|||
|
||||
return m
|
||||
|
||||
|
||||
|
||||
def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4):
|
||||
def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize=True, plot=True):
|
||||
# Create an artificial dataset where the values in the targets (Y)
|
||||
# only depend in dimensions 1 and 3 of the inputs (X). Run ARD to
|
||||
# see if this dependency can be recovered
|
||||
|
|
@ -347,13 +347,16 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4):
|
|||
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
|
||||
# m.set_prior('.*lengthscale',len_prior)
|
||||
|
||||
m.optimize(optimizer='scg', max_iters=max_iters, messages=1)
|
||||
if optimize:
|
||||
m.optimize(optimizer='scg', max_iters=max_iters, messages=1)
|
||||
|
||||
m.kern.plot_ARD()
|
||||
print(m)
|
||||
if plot:
|
||||
m.kern.plot_ARD()
|
||||
|
||||
print m
|
||||
return m
|
||||
|
||||
def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4):
|
||||
def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize=True, plot=True):
|
||||
# Create an artificial dataset where the values in the targets (Y)
|
||||
# only depend in dimensions 1 and 3 of the inputs (X). Run ARD to
|
||||
# see if this dependency can be recovered
|
||||
|
|
@ -384,13 +387,16 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4):
|
|||
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
|
||||
# m.set_prior('.*lengthscale',len_prior)
|
||||
|
||||
m.optimize(optimizer='scg', max_iters=max_iters, messages=1)
|
||||
if optimize:
|
||||
m.optimize(optimizer='scg', max_iters=max_iters, messages=1)
|
||||
|
||||
m.kern.plot_ARD()
|
||||
print(m)
|
||||
if plot:
|
||||
m.kern.plot_ARD()
|
||||
|
||||
print m
|
||||
return m
|
||||
|
||||
def robot_wireless(max_iters=100, kernel=None):
|
||||
def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
|
||||
"""Predict the location of a robot given wirelss signal strength readings."""
|
||||
data = GPy.util.datasets.robot_wireless()
|
||||
|
||||
|
|
@ -398,20 +404,24 @@ def robot_wireless(max_iters=100, kernel=None):
|
|||
m = GPy.models.GPRegression(data['Y'], data['X'], kernel=kernel)
|
||||
|
||||
# optimize
|
||||
m.optimize(messages=True, max_iters=max_iters)
|
||||
if optimize:
|
||||
m.optimize(messages=True, max_iters=max_iters)
|
||||
|
||||
Xpredict = m.predict(data['Ytest'])[0]
|
||||
pb.plot(data['Xtest'][:, 0], data['Xtest'][:, 1], 'r-')
|
||||
pb.plot(Xpredict[:, 0], Xpredict[:, 1], 'b-')
|
||||
pb.axis('equal')
|
||||
pb.title('WiFi Localization with Gaussian Processes')
|
||||
pb.legend(('True Location', 'Predicted Location'))
|
||||
if plot:
|
||||
pb.plot(data['Xtest'][:, 0], data['Xtest'][:, 1], 'r-')
|
||||
pb.plot(Xpredict[:, 0], Xpredict[:, 1], 'b-')
|
||||
pb.axis('equal')
|
||||
pb.title('WiFi Localization with Gaussian Processes')
|
||||
pb.legend(('True Location', 'Predicted Location'))
|
||||
|
||||
sse = ((data['Xtest'] - Xpredict)**2).sum()
|
||||
print(m)
|
||||
|
||||
print m
|
||||
print('Sum of squares error on test data: ' + str(sse))
|
||||
return m
|
||||
|
||||
def silhouette(max_iters=100):
|
||||
def silhouette(max_iters=100, optimize=True, plot=True):
|
||||
"""Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
|
||||
data = GPy.util.datasets.silhouette()
|
||||
|
||||
|
|
@ -419,12 +429,13 @@ def silhouette(max_iters=100):
|
|||
m = GPy.models.GPRegression(data['X'], data['Y'])
|
||||
|
||||
# optimize
|
||||
m.optimize(messages=True, max_iters=max_iters)
|
||||
if optimize:
|
||||
m.optimize(messages=True, max_iters=max_iters)
|
||||
|
||||
print(m)
|
||||
print m
|
||||
return m
|
||||
|
||||
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100):
|
||||
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True):
|
||||
"""Run a 1D example of a sparse GP regression."""
|
||||
# sample inputs and outputs
|
||||
X = np.random.uniform(-3., 3., (num_samples, 1))
|
||||
|
|
@ -433,14 +444,17 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100):
|
|||
rbf = GPy.kern.rbf(1)
|
||||
# create simple GP Model
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
|
||||
|
||||
|
||||
m.checkgrad(verbose=1)
|
||||
m.optimize('tnc', messages=1, max_iters=max_iters)
|
||||
m.plot()
|
||||
|
||||
if optimize:
|
||||
m.optimize('tnc', messages=1, max_iters=max_iters)
|
||||
|
||||
if plot:
|
||||
m.plot()
|
||||
|
||||
return m
|
||||
|
||||
def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100):
|
||||
def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, optimize=True, plot=True):
|
||||
"""Run a 2D example of a sparse GP regression."""
|
||||
X = np.random.uniform(-3., 3., (num_samples, 2))
|
||||
Y = np.sin(X[:, 0:1]) * np.sin(X[:, 1:2]) + np.random.randn(num_samples, 1) * 0.05
|
||||
|
|
@ -456,13 +470,18 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100):
|
|||
|
||||
m.checkgrad()
|
||||
|
||||
# optimize and plot
|
||||
m.optimize('tnc', messages=1, max_iters=max_iters)
|
||||
m.plot()
|
||||
print(m)
|
||||
# optimize
|
||||
if optimize:
|
||||
m.optimize('tnc', messages=1, max_iters=max_iters)
|
||||
|
||||
# plot
|
||||
if plot:
|
||||
m.plot()
|
||||
|
||||
print m
|
||||
return m
|
||||
|
||||
def uncertain_inputs_sparse_regression(optimize=True, plot=True):
|
||||
def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
|
||||
"""Run a 1D example of a sparse GP regression with uncertain inputs."""
|
||||
fig, axes = pb.subplots(1, 2, figsize=(12, 5))
|
||||
|
||||
|
|
@ -477,18 +496,23 @@ def uncertain_inputs_sparse_regression(optimize=True, plot=True):
|
|||
|
||||
# create simple GP Model - no input uncertainty on this one
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
|
||||
m.optimize('scg', messages=1, max_iters=max_iters)
|
||||
m.plot(ax=axes[0])
|
||||
axes[0].set_title('no input uncertainty')
|
||||
|
||||
if optimize:
|
||||
m.optimize('scg', messages=1, max_iters=max_iters)
|
||||
|
||||
if plot:
|
||||
m.plot(ax=axes[0])
|
||||
axes[0].set_title('no input uncertainty')
|
||||
print m
|
||||
|
||||
# the same Model with uncertainty
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S)
|
||||
m.optimize('scg', messages=1, max_iters=max_iters)
|
||||
m.plot(ax=axes[1])
|
||||
axes[1].set_title('with input uncertainty')
|
||||
print(m)
|
||||
|
||||
fig.canvas.draw()
|
||||
if optimize:
|
||||
m.optimize('scg', messages=1, max_iters=max_iters)
|
||||
if plot:
|
||||
m.plot(ax=axes[1])
|
||||
axes[1].set_title('with input uncertainty')
|
||||
fig.canvas.draw()
|
||||
|
||||
print m
|
||||
return m
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import pylab as pb
|
|||
import numpy as np
|
||||
import GPy
|
||||
|
||||
def toy_1d():
|
||||
def toy_1d(optimize=True, plot=True):
|
||||
N = 2000
|
||||
M = 20
|
||||
|
||||
|
|
@ -20,22 +20,18 @@ def toy_1d():
|
|||
|
||||
m.param_steplength = 1e-4
|
||||
|
||||
fig = pb.figure()
|
||||
ax = fig.add_subplot(111)
|
||||
def cb():
|
||||
ax.cla()
|
||||
m.plot(ax=ax,Z_height=-3)
|
||||
ax.set_ylim(-3,3)
|
||||
fig.canvas.draw()
|
||||
if plot:
|
||||
fig = pb.figure()
|
||||
ax = fig.add_subplot(111)
|
||||
def cb(foo):
|
||||
ax.cla()
|
||||
m.plot(ax=ax,Z_height=-3)
|
||||
ax.set_ylim(-3,3)
|
||||
fig.canvas.draw()
|
||||
|
||||
m.optimize(500, callback=cb, callback_interval=1)
|
||||
if optimize:
|
||||
m.optimize(500, callback=cb, callback_interval=1)
|
||||
|
||||
m.plot_traces()
|
||||
if plot:
|
||||
m.plot_traces()
|
||||
return m
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ pb.ion()
|
|||
import numpy as np
|
||||
import GPy
|
||||
|
||||
def tuto_GP_regression():
|
||||
def tuto_GP_regression(optimize=True, plot=True):
|
||||
"""The detailed explanations of the commands used in this file can be found in the tutorial section"""
|
||||
|
||||
X = np.random.uniform(-3.,3.,(20,1))
|
||||
|
|
@ -22,7 +22,8 @@ def tuto_GP_regression():
|
|||
m = GPy.models.GPRegression(X, Y, kernel)
|
||||
|
||||
print m
|
||||
m.plot()
|
||||
if plot:
|
||||
m.plot()
|
||||
|
||||
m.constrain_positive('')
|
||||
|
||||
|
|
@ -31,9 +32,9 @@ def tuto_GP_regression():
|
|||
m.constrain_bounded('.*lengthscale',1.,10. )
|
||||
m.constrain_fixed('.*noise',0.0025)
|
||||
|
||||
m.optimize()
|
||||
|
||||
m.optimize_restarts(num_restarts = 10)
|
||||
if optimize:
|
||||
m.optimize()
|
||||
m.optimize_restarts(num_restarts = 10)
|
||||
|
||||
#######################################################
|
||||
#######################################################
|
||||
|
|
@ -51,12 +52,15 @@ def tuto_GP_regression():
|
|||
m.constrain_positive('')
|
||||
|
||||
# optimize and plot
|
||||
m.optimize('tnc', max_f_eval = 1000)
|
||||
m.plot()
|
||||
print(m)
|
||||
if optimize:
|
||||
m.optimize('tnc', max_f_eval = 1000)
|
||||
if plot:
|
||||
m.plot()
|
||||
|
||||
print m
|
||||
return(m)
|
||||
|
||||
def tuto_kernel_overview():
|
||||
def tuto_kernel_overview(optimize=True, plot=True):
|
||||
"""The detailed explanations of the commands used in this file can be found in the tutorial section"""
|
||||
ker1 = GPy.kern.rbf(1) # Equivalent to ker1 = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
|
||||
ker2 = GPy.kern.rbf(input_dim=1, variance = .75, lengthscale=2.)
|
||||
|
|
@ -64,9 +68,10 @@ def tuto_kernel_overview():
|
|||
|
||||
print ker2
|
||||
|
||||
ker1.plot()
|
||||
ker2.plot()
|
||||
ker3.plot()
|
||||
if plot:
|
||||
ker1.plot()
|
||||
ker2.plot()
|
||||
ker3.plot()
|
||||
|
||||
k1 = GPy.kern.rbf(1,1.,2.)
|
||||
k2 = GPy.kern.Matern32(1, 0.5, 0.2)
|
||||
|
|
@ -114,30 +119,32 @@ def tuto_kernel_overview():
|
|||
|
||||
# Create GP regression model
|
||||
m = GPy.models.GPRegression(X, Y, Kanova)
|
||||
fig = pb.figure(figsize=(5,5))
|
||||
ax = fig.add_subplot(111)
|
||||
m.plot(ax=ax)
|
||||
|
||||
pb.figure(figsize=(20,3))
|
||||
pb.subplots_adjust(wspace=0.5)
|
||||
axs = pb.subplot(1,5,1)
|
||||
m.plot(ax=axs)
|
||||
pb.subplot(1,5,2)
|
||||
pb.ylabel("= ",rotation='horizontal',fontsize='30')
|
||||
axs = pb.subplot(1,5,3)
|
||||
m.plot(ax=axs, which_parts=[False,True,False,False])
|
||||
pb.ylabel("cst +",rotation='horizontal',fontsize='30')
|
||||
axs = pb.subplot(1,5,4)
|
||||
m.plot(ax=axs, which_parts=[False,False,True,False])
|
||||
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
|
||||
axs = pb.subplot(1,5,5)
|
||||
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
|
||||
m.plot(ax=axs, which_parts=[False,False,False,True])
|
||||
if plot:
|
||||
fig = pb.figure(figsize=(5,5))
|
||||
ax = fig.add_subplot(111)
|
||||
m.plot(ax=ax)
|
||||
|
||||
pb.figure(figsize=(20,3))
|
||||
pb.subplots_adjust(wspace=0.5)
|
||||
axs = pb.subplot(1,5,1)
|
||||
m.plot(ax=axs)
|
||||
pb.subplot(1,5,2)
|
||||
pb.ylabel("= ",rotation='horizontal',fontsize='30')
|
||||
axs = pb.subplot(1,5,3)
|
||||
m.plot(ax=axs, which_parts=[False,True,False,False])
|
||||
pb.ylabel("cst +",rotation='horizontal',fontsize='30')
|
||||
axs = pb.subplot(1,5,4)
|
||||
m.plot(ax=axs, which_parts=[False,False,True,False])
|
||||
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
|
||||
axs = pb.subplot(1,5,5)
|
||||
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
|
||||
m.plot(ax=axs, which_parts=[False,False,False,True])
|
||||
|
||||
return(m)
|
||||
|
||||
|
||||
def model_interaction():
|
||||
def model_interaction(optimize=True, plot=True):
|
||||
X = np.random.randn(20,1)
|
||||
Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5.
|
||||
k = GPy.kern.rbf(1) + GPy.kern.bias(1)
|
||||
|
|
|
|||
|
|
@ -138,6 +138,10 @@ class ODE_1(Kernpart):
|
|||
k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
|
||||
dkdvar = k1+k2+k3
|
||||
|
||||
#target[0] dk dvarU
|
||||
#target[1] dk dvarY
|
||||
#target[2] dk d theta1
|
||||
#target[3] dk d theta2
|
||||
target[0] += np.sum(self.varianceY*dkdvar * dL_dK)
|
||||
target[1] += np.sum(self.varianceU*dkdvar * dL_dK)
|
||||
target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)
|
||||
|
|
|
|||
|
|
@ -95,6 +95,8 @@ class ODE_UY(Kernpart):
|
|||
|
||||
def K(self, X, X2, target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
# model : a * dy/dt + b * y = U
|
||||
#lu=sqrt(3)/theta1 ly=1/theta2 theta2= a/b :thetay sigma2=1/(2ab) :sigmay
|
||||
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
|
|
@ -112,20 +114,28 @@ class ODE_UY(Kernpart):
|
|||
Vu=self.varianceU
|
||||
Vy=self.varianceY
|
||||
|
||||
# kernel for kuu matern3/2
|
||||
kuu = lambda dist:Vu * (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
|
||||
|
||||
# kernel for kyy
|
||||
k1 = lambda dist:np.exp(-ly*np.abs(dist))*(2*lu+ly)/(lu+ly)**2
|
||||
k2 = lambda dist:(np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
|
||||
k3 = lambda dist:np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
|
||||
kyy = lambda dist:Vu*Vy*(k1(dist) + k2(dist) + k3(dist))
|
||||
|
||||
|
||||
# cross covariance function
|
||||
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
|
||||
|
||||
# cross covariance kyu
|
||||
kyup = lambda dist:Vu*Vy*(k1(dist)+k2(dist)) #t>0 kyu
|
||||
kyun = lambda dist:Vu*Vy*(kyu3(dist)) #t<0 kyu
|
||||
|
||||
# cross covariance kuy
|
||||
kuyp = lambda dist:Vu*Vy*(kyu3(dist)) #t>0 kuy
|
||||
kuyn = lambda dist:Vu*Vy*(k1(dist)+k2(dist)) #t<0 kuy
|
||||
|
||||
|
||||
for i, s1 in enumerate(slices):
|
||||
for j, s2 in enumerate(slices2):
|
||||
for ss1 in s1:
|
||||
|
|
@ -133,12 +143,13 @@ class ODE_UY(Kernpart):
|
|||
if i==0 and j==0:
|
||||
target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
|
||||
elif i==0 and j==1:
|
||||
target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
|
||||
#target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
|
||||
target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[ss1,ss2]) ) )
|
||||
elif i==1 and j==1:
|
||||
target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
|
||||
else:
|
||||
target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) )
|
||||
|
||||
#target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) )
|
||||
target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[ss1,ss2]) ) )
|
||||
|
||||
#KUU = kuu(np.abs(rdist[:iu,:iu]))
|
||||
|
||||
|
|
@ -184,13 +195,30 @@ class ODE_UY(Kernpart):
|
|||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.abs(X - X2.T)
|
||||
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
else:
|
||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
|
||||
rdist = X - X2.T
|
||||
ly=1/self.lengthscaleY
|
||||
lu=np.sqrt(3)/self.lengthscaleU
|
||||
#ly=self.lengthscaleY
|
||||
#lu=self.lengthscaleU
|
||||
|
||||
rd=rdist.shape[0]
|
||||
dktheta1 = np.zeros([rd,rd])
|
||||
dktheta2 = np.zeros([rd,rd])
|
||||
dkUdvar = np.zeros([rd,rd])
|
||||
dkYdvar = np.zeros([rd,rd])
|
||||
|
||||
# dk dtheta for UU
|
||||
UUdtheta1 = lambda dist: np.exp(-lu* dist)*dist + (-dist)*np.exp(-lu* dist)*(1+lu*dist)
|
||||
UUdtheta2 = lambda dist: 0
|
||||
#UUdvar = lambda dist: (1 + lu*dist)*np.exp(-lu*dist)
|
||||
UUdvar = lambda dist: (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
|
||||
|
||||
# dk dtheta for YY
|
||||
|
||||
dk1theta1 = lambda dist: np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
|
||||
#c=np.sqrt(3)
|
||||
|
|
@ -207,7 +235,7 @@ class ODE_UY(Kernpart):
|
|||
|
||||
dk3theta1 = lambda dist: np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
|
||||
|
||||
dktheta1 = lambda dist: self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1)
|
||||
#dktheta1 = lambda dist: self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1)
|
||||
|
||||
|
||||
|
||||
|
|
@ -221,17 +249,72 @@ class ODE_UY(Kernpart):
|
|||
|
||||
dk3theta2 = lambda dist: np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
|
||||
|
||||
dktheta2 = lambda dist: self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2)
|
||||
|
||||
|
||||
#dktheta2 = lambda dist: self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2)
|
||||
|
||||
# kyy kernel
|
||||
#k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
|
||||
#k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
|
||||
#k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
|
||||
k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
|
||||
k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
|
||||
k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
|
||||
dkdvar = k1+k2+k3
|
||||
#dkdvar = k1+k2+k3
|
||||
|
||||
target[0] += np.sum(self.varianceY*dkdvar * dL_dK)
|
||||
target[1] += np.sum(self.varianceU*dkdvar * dL_dK)
|
||||
#cross covariance kernel
|
||||
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
|
||||
|
||||
# dk dtheta for UY
|
||||
dkcrtheta2 = lambda dist: np.exp(-lu*dist) * ( (-1)*(lu+ly)**(-2)*(1+lu*dist+lu*(lu+ly)**(-1)) + (lu+ly)**(-1)*(-lu)*(lu+ly)**(-2) )
|
||||
dkcrtheta1 = lambda dist: np.exp(-lu*dist)*(lu+ly)**(-1)* ( (-dist)*(1+dist*lu+lu*(lu+ly)**(-1)) - (lu+ly)**(-1)*(1+dist*lu+lu*(lu+ly)**(-1)) +dist+(lu+ly)**(-1)-lu*(lu+ly)**(-2) )
|
||||
#dkuyp dtheta
|
||||
#dkuyp dtheta1 = self.varianceU*self.varianceY* (dk1theta1() + dk2theta1())
|
||||
#dkuyp dtheta2 = self.varianceU*self.varianceY* (dk1theta2() + dk2theta2())
|
||||
#dkuyp dVar = k1() + k2()
|
||||
|
||||
|
||||
#dkyup dtheta
|
||||
#dkyun dtheta1 = self.varianceU*self.varianceY* (dk1theta1() + dk2theta1())
|
||||
#dkyun dtheta2 = self.varianceU*self.varianceY* (dk1theta2() + dk2theta2())
|
||||
#dkyup dVar = k1() + k2() #
|
||||
|
||||
|
||||
|
||||
|
||||
for i, s1 in enumerate(slices):
|
||||
for j, s2 in enumerate(slices2):
|
||||
for ss1 in s1:
|
||||
for ss2 in s2:
|
||||
if i==0 and j==0:
|
||||
#target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
|
||||
dktheta1[ss1,ss2] = self.varianceU*self.varianceY*UUdtheta1(np.abs(rdist[ss1,ss2]))
|
||||
dktheta2[ss1,ss2] = 0
|
||||
dkUdvar[ss1,ss2] = UUdvar(np.abs(rdist[ss1,ss2]))
|
||||
dkYdvar[ss1,ss2] = 0
|
||||
elif i==0 and j==1:
|
||||
#target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
|
||||
#dktheta1[ss1,ss2] =
|
||||
#dktheta2[ss1,ss2] =
|
||||
#dkdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
|
||||
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , dkcrtheta1(np.abs(rdist[ss1,ss2])) ,self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) )
|
||||
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , dkcrtheta2(np.abs(rdist[ss1,ss2])) ,self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) )
|
||||
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyu3(np.abs(rdist[ss1,ss2])) ,k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2])) )
|
||||
dkYdvar[ss1,ss2] = dkUdvar[ss1,ss2]
|
||||
elif i==1 and j==1:
|
||||
#target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
|
||||
dktheta1[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2])))
|
||||
dktheta2[ss1,ss2] = self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2])))
|
||||
dkUdvar[ss1,ss2] = (k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
|
||||
dkYdvar[ss1,ss2] = dkUdvar[ss1,ss2]
|
||||
else:
|
||||
#target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) )
|
||||
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.varianceU*self.varianceY*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) , dkcrtheta1(np.abs(rdist[ss1,ss2])) )
|
||||
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.varianceU*self.varianceY*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) , dkcrtheta2(np.abs(rdist[ss1,ss2])) )
|
||||
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2])), kyu3(np.abs(rdist[ss1,ss2])) )
|
||||
dkYdvar[ss1,ss2] = dkUdvar[ss1,ss2]
|
||||
|
||||
|
||||
target[0] += np.sum(self.varianceY*dkUdvar * dL_dK)
|
||||
target[1] += np.sum(self.varianceU*dkYdvar * dL_dK)
|
||||
target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)
|
||||
target[3] += np.sum(dktheta2*(-self.lengthscaleY**(-2)) * dL_dK)
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import scipy as sp
|
|||
from likelihood import likelihood
|
||||
from ..util.linalg import mdot, jitchol, pddet, dpotrs
|
||||
from functools import partial as partial_func
|
||||
import warnings
|
||||
|
||||
class Laplace(likelihood):
|
||||
"""Laplace approximation to a posterior"""
|
||||
|
|
@ -64,6 +65,7 @@ class Laplace(likelihood):
|
|||
self.YYT = None
|
||||
|
||||
self.old_Ki_f = None
|
||||
self.bad_fhat = False
|
||||
|
||||
def predictive_values(self,mu,var,full_cov,**noise_args):
|
||||
if full_cov:
|
||||
|
|
@ -198,17 +200,17 @@ class Laplace(likelihood):
|
|||
Y_tilde = Wi*self.Ki_f + self.f_hat
|
||||
|
||||
self.Wi_K_i = self.W12BiW12
|
||||
self.ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
|
||||
self.lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
|
||||
self.y_Wi_Ki_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
|
||||
ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
|
||||
lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
|
||||
y_Wi_K_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
|
||||
|
||||
Z_tilde = (+ self.lik
|
||||
Z_tilde = (+ lik
|
||||
- 0.5*self.ln_B_det
|
||||
+ 0.5*self.ln_det_Wi_K
|
||||
+ 0.5*ln_det_Wi_K
|
||||
- 0.5*self.f_Ki_f
|
||||
+ 0.5*self.y_Wi_Ki_i_y
|
||||
+ 0.5*y_Wi_K_i_y
|
||||
+ self.NORMAL_CONST
|
||||
)
|
||||
#print "Term, {}, {}, {}, {}, {}".format(self.lik, - 0.5*self.ln_B_det, + 0.5*self.ln_det_Wi_K, - 0.5*self.f_Ki_f, + 0.5*self.y_Wi_Ki_i_y)
|
||||
|
||||
#Convert to float as its (1, 1) and Z must be a scalar
|
||||
self.Z = np.float64(Z_tilde)
|
||||
|
|
@ -247,7 +249,10 @@ class Laplace(likelihood):
|
|||
#At this point get the hessian matrix (or vector as W is diagonal)
|
||||
self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data)
|
||||
|
||||
#TODO: Could save on computation when using rasm by returning these, means it isn't just a "mode finder" though
|
||||
if not self.noise_model.log_concave:
|
||||
#print "Under 1e-10: {}".format(np.sum(self.W < 1e-6))
|
||||
self.W[self.W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||
|
||||
self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N))
|
||||
|
||||
self.Ki_f = self.Ki_f
|
||||
|
|
@ -268,7 +273,7 @@ class Laplace(likelihood):
|
|||
:returns: (W12BiW12, ln_B_det)
|
||||
"""
|
||||
if not self.noise_model.log_concave:
|
||||
#print "Under 1e-10: {}".format(np.sum(W < 1e-10))
|
||||
#print "Under 1e-10: {}".format(np.sum(W < 1e-6))
|
||||
W[W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||
# To cause the posterior to become less certain than the prior and likelihood,
|
||||
|
|
@ -278,16 +283,13 @@ class Laplace(likelihood):
|
|||
#W is diagonal so its sqrt is just the sqrt of the diagonal elements
|
||||
W_12 = np.sqrt(W)
|
||||
B = np.eye(self.N) + W_12*K*W_12.T
|
||||
try:
|
||||
L = jitchol(B)
|
||||
except:
|
||||
import ipdb; ipdb.set_trace()
|
||||
L = jitchol(B)
|
||||
|
||||
W12BiW12 = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
|
||||
W12BiW12a = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
|
||||
ln_B_det = 2*np.sum(np.log(np.diag(L)))
|
||||
return W12BiW12, ln_B_det
|
||||
return W12BiW12a, ln_B_det
|
||||
|
||||
def rasm_mode(self, K, MAX_ITER=30):
|
||||
def rasm_mode(self, K, MAX_ITER=40):
|
||||
"""
|
||||
Rasmussen's numerically stable mode finding
|
||||
For nomenclature see Rasmussen & Williams 2006
|
||||
|
|
@ -302,9 +304,10 @@ class Laplace(likelihood):
|
|||
"""
|
||||
#old_Ki_f = np.zeros((self.N, 1))
|
||||
|
||||
#Start f's at zero originally
|
||||
if self.old_Ki_f is None:
|
||||
old_Ki_f = np.zeros((self.N, 1))
|
||||
#Start f's at zero originally of if we have gone off track, try restarting
|
||||
if self.old_Ki_f is None or self.bad_fhat:
|
||||
old_Ki_f = np.random.rand(self.N, 1)/50.0
|
||||
#old_Ki_f = self.Y
|
||||
f = np.dot(K, old_Ki_f)
|
||||
else:
|
||||
#Start at the old best point
|
||||
|
|
@ -318,7 +321,7 @@ class Laplace(likelihood):
|
|||
return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
|
||||
|
||||
difference = np.inf
|
||||
epsilon = 1e-5
|
||||
epsilon = 1e-7
|
||||
#step_size = 1
|
||||
#rs = 0
|
||||
i = 0
|
||||
|
|
@ -381,14 +384,20 @@ class Laplace(likelihood):
|
|||
|
||||
#difference = abs(new_obj - old_obj)
|
||||
#old_obj = new_obj.copy()
|
||||
difference = np.abs(np.sum(f - f_old))
|
||||
#difference = np.abs(np.sum(Ki_f - old_Ki_f))
|
||||
difference = np.abs(np.sum(f - f_old)) + np.abs(np.sum(Ki_f - old_Ki_f))
|
||||
#difference = np.abs(np.sum(Ki_f - old_Ki_f))/np.float(self.N)
|
||||
old_Ki_f = Ki_f.copy()
|
||||
i += 1
|
||||
|
||||
self.old_Ki_f = old_Ki_f.copy()
|
||||
|
||||
#Warn of bad fits
|
||||
if difference > epsilon:
|
||||
print "Not perfect f_hat fit difference: {}".format(difference)
|
||||
self.bad_fhat = True
|
||||
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
|
||||
elif self.bad_fhat:
|
||||
self.bad_fhat = False
|
||||
warnings.warn("f_hat now perfect again")
|
||||
|
||||
self.Ki_f = Ki_f
|
||||
return f
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import os
|
|||
import random
|
||||
from nose.tools import nottest
|
||||
import sys
|
||||
import itertools
|
||||
|
||||
class ExamplesTests(unittest.TestCase):
|
||||
def _checkgrad(self, Model):
|
||||
|
|
@ -39,8 +40,19 @@ def model_instance(model):
|
|||
#assert isinstance(model, GPy.core.model)
|
||||
return isinstance(model, GPy.core.model.Model)
|
||||
|
||||
@nottest
|
||||
def flatten_nested(lst):
|
||||
result = []
|
||||
for element in lst:
|
||||
if hasattr(element, '__iter__'):
|
||||
result.extend(flatten_nested(element))
|
||||
else:
|
||||
result.append(element)
|
||||
return result
|
||||
|
||||
#@nottest
|
||||
def test_models():
|
||||
optimize=False
|
||||
plot=True
|
||||
examples_path = os.path.dirname(GPy.examples.__file__)
|
||||
# Load modules
|
||||
failing_models = {}
|
||||
|
|
@ -54,29 +66,34 @@ def test_models():
|
|||
print "After"
|
||||
print functions
|
||||
for example in functions:
|
||||
if example[0] in ['oil', 'silhouette', 'GPLVM_oil_100', 'brendan_faces']:
|
||||
print "SKIPPING"
|
||||
continue
|
||||
#if example[0] in ['oil', 'silhouette', 'GPLVM_oil_100', 'brendan_faces']:
|
||||
#print "SKIPPING"
|
||||
#continue
|
||||
|
||||
print "Testing example: ", example[0]
|
||||
# Generate model
|
||||
|
||||
try:
|
||||
model = example[1]()
|
||||
models = [ example[1](optimize=optimize, plot=plot) ]
|
||||
#If more than one model returned, flatten them
|
||||
models = flatten_nested(models)
|
||||
except Exception as e:
|
||||
failing_models[example[0]] = "Cannot make model: \n{e}".format(e=e)
|
||||
else:
|
||||
print model
|
||||
print models
|
||||
model_checkgrads.description = 'test_checkgrads_%s' % example[0]
|
||||
try:
|
||||
if not model_checkgrads(model):
|
||||
failing_models[model_checkgrads.description] = False
|
||||
for model in models:
|
||||
if not model_checkgrads(model):
|
||||
failing_models[model_checkgrads.description] = False
|
||||
except Exception as e:
|
||||
failing_models[model_checkgrads.description] = e
|
||||
|
||||
model_instance.description = 'test_instance_%s' % example[0]
|
||||
try:
|
||||
if not model_instance(model):
|
||||
failing_models[model_instance.description] = False
|
||||
for model in models:
|
||||
if not model_instance(model):
|
||||
failing_models[model_instance.description] = False
|
||||
except Exception as e:
|
||||
failing_models[model_instance.description] = e
|
||||
|
||||
|
|
|
|||
|
|
@ -593,6 +593,95 @@ class LaplaceTests(unittest.TestCase):
|
|||
grad.checkgrad(verbose=1)
|
||||
self.assertTrue(grad.checkgrad())
|
||||
|
||||
#@unittest.skip('Not working yet, needs to be checked')
|
||||
def test_laplace_log_likelihood(self):
|
||||
debug = False
|
||||
real_std = 0.1
|
||||
initial_var_guess = 0.5
|
||||
|
||||
#Start a function, any function
|
||||
X = np.linspace(0.0, np.pi*2, 100)[:, None]
|
||||
Y = np.sin(X) + np.random.randn(*X.shape)*real_std
|
||||
Y = Y/Y.max()
|
||||
#Yc = Y.copy()
|
||||
#Yc[75:80] += 1
|
||||
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
kernel2 = kernel1.copy()
|
||||
|
||||
m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
|
||||
m1.constrain_fixed('white', 1e-6)
|
||||
m1['noise'] = initial_var_guess
|
||||
m1.constrain_bounded('noise', 1e-4, 10)
|
||||
m1.constrain_bounded('rbf', 1e-4, 10)
|
||||
m1.ensure_default_constraints()
|
||||
m1.randomize()
|
||||
|
||||
gauss_distr = GPy.likelihoods.gaussian(variance=initial_var_guess, D=1, N=Y.shape[0])
|
||||
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), gauss_distr)
|
||||
m2 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel2, likelihood=laplace_likelihood)
|
||||
m2.ensure_default_constraints()
|
||||
m2.constrain_fixed('white', 1e-6)
|
||||
m2.constrain_bounded('rbf', 1e-4, 10)
|
||||
m2.constrain_bounded('noise', 1e-4, 10)
|
||||
m2.randomize()
|
||||
|
||||
if debug:
|
||||
print m1
|
||||
print m2
|
||||
optimizer = 'scg'
|
||||
print "Gaussian"
|
||||
m1.optimize(optimizer, messages=debug)
|
||||
print "Laplace Gaussian"
|
||||
m2.optimize(optimizer, messages=debug)
|
||||
if debug:
|
||||
print m1
|
||||
print m2
|
||||
|
||||
m2._set_params(m1._get_params())
|
||||
|
||||
#Predict for training points to get posterior mean and variance
|
||||
post_mean, post_var, _, _ = m1.predict(X)
|
||||
post_mean_approx, post_var_approx, _, _ = m2.predict(X)
|
||||
|
||||
if debug:
|
||||
import pylab as pb
|
||||
pb.figure(5)
|
||||
pb.title('posterior means')
|
||||
pb.scatter(X, post_mean, c='g')
|
||||
pb.scatter(X, post_mean_approx, c='r', marker='x')
|
||||
|
||||
pb.figure(6)
|
||||
pb.title('plot_f')
|
||||
m1.plot_f(fignum=6)
|
||||
m2.plot_f(fignum=6)
|
||||
fig, axes = pb.subplots(2, 1)
|
||||
fig.suptitle('Covariance matricies')
|
||||
a1 = pb.subplot(121)
|
||||
a1.matshow(m1.likelihood.covariance_matrix)
|
||||
a2 = pb.subplot(122)
|
||||
a2.matshow(m2.likelihood.covariance_matrix)
|
||||
|
||||
pb.figure(8)
|
||||
pb.scatter(X, m1.likelihood.Y, c='g')
|
||||
pb.scatter(X, m2.likelihood.Y, c='r', marker='x')
|
||||
|
||||
|
||||
|
||||
#Check Y's are the same
|
||||
np.testing.assert_almost_equal(Y, m2.likelihood.Y, decimal=5)
|
||||
#Check marginals are the same
|
||||
np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2)
|
||||
#Check marginals are the same with random
|
||||
m1.randomize()
|
||||
m2._set_params(m1._get_params())
|
||||
np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2)
|
||||
|
||||
#Check they are checkgradding
|
||||
#m1.checkgrad(verbose=1)
|
||||
#m2.checkgrad(verbose=1)
|
||||
self.assertTrue(m1.checkgrad())
|
||||
self.assertTrue(m2.checkgrad())
|
||||
|
||||
if __name__ == "__main__":
|
||||
print "Running unit tests"
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@
|
|||
"citation":"Please include this in your acknowledgements: The data used in this project was obtained from mocap.cs.cmu.edu.\nThe database was created with funding from NSF EIA-0196217.",
|
||||
"details":"CMU Motion Capture data base. Captured by a Vicon motion capture system consisting of 12 infrared MX-40 cameras, each of which is capable of recording at 120 Hz with images of 4 megapixel resolution. Motions are captured in a working volume of approximately 3m x 8m. The capture subject wears 41 markers and a stylish black garment.",
|
||||
"urls":[
|
||||
"http://mocap.cs.cmu.edu"
|
||||
"http://mocap.cs.cmu.edu/subjects"
|
||||
],
|
||||
"size":null
|
||||
},
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ import numpy as np
|
|||
import GPy
|
||||
import scipy.io
|
||||
import cPickle as pickle
|
||||
import urllib as url
|
||||
import zipfile
|
||||
import tarfile
|
||||
import datetime
|
||||
|
|
@ -15,7 +14,7 @@ except ImportError:
|
|||
ipython_available=False
|
||||
|
||||
|
||||
import sys, urllib
|
||||
import sys, urllib2
|
||||
|
||||
def reporthook(a,b,c):
|
||||
# ',' at the end of the line is important!
|
||||
|
|
@ -82,7 +81,21 @@ def download_url(url, store_directory, save_name = None, messages = True, suffix
|
|||
print "Downloading ", url, "->", os.path.join(store_directory, file)
|
||||
if not os.path.exists(dir_name):
|
||||
os.makedirs(dir_name)
|
||||
urllib.urlretrieve(url+suffix, save_name, reporthook)
|
||||
try:
|
||||
response = urllib2.urlopen(url+suffix)
|
||||
except urllib2.URLError, e:
|
||||
if not hasattr(e, "code"):
|
||||
raise
|
||||
response = e
|
||||
if response.code > 399 and response.code<500:
|
||||
raise ValueError('Tried url ' + url + suffix + ' and received client error ' + str(response.code))
|
||||
elif response.code > 499:
|
||||
raise ValueError('Tried url ' + url + suffix + ' and received server error ' + str(response.code))
|
||||
# if we wanted to get more sophisticated maybe we should check the response code here again even for successes.
|
||||
with open(save_name, 'wb') as f:
|
||||
f.write(response.read())
|
||||
|
||||
#urllib.urlretrieve(url+suffix, save_name, reporthook)
|
||||
|
||||
def authorize_download(dataset_name=None):
|
||||
"""Check with the user that the are happy with terms and conditions for the data set."""
|
||||
|
|
@ -142,6 +155,8 @@ def cmu_urls_files(subj_motions, messages = True):
|
|||
'''
|
||||
Find which resources are missing on the local disk for the requested CMU motion capture motions.
|
||||
'''
|
||||
dr = data_resources['cmu_mocap_full']
|
||||
cmu_url = dr['urls'][0]
|
||||
|
||||
subjects_num = subj_motions[0]
|
||||
motions_num = subj_motions[1]
|
||||
|
|
@ -187,7 +202,7 @@ def cmu_urls_files(subj_motions, messages = True):
|
|||
url_required = True
|
||||
file_download.append(subjects[i] + '_' + motions[i][j] + '.amc')
|
||||
if url_required:
|
||||
resource['urls'].append(cmu_url + subjects[i] + '/')
|
||||
resource['urls'].append(cmu_url + '/' + subjects[i] + '/')
|
||||
resource['files'].append(file_download)
|
||||
return resource
|
||||
|
||||
|
|
@ -435,7 +450,7 @@ def simulation_BGPLVM():
|
|||
Y = np.array(mat_data['Y'], dtype=float)
|
||||
S = np.array(mat_data['initS'], dtype=float)
|
||||
mu = np.array(mat_data['initMu'], dtype=float)
|
||||
return data_details_return({'S': S, 'Y': Y, 'mu': mu}, data_set)
|
||||
#return data_details_return({'S': S, 'Y': Y, 'mu': mu}, data_set)
|
||||
return {'Y': Y, 'S': S,
|
||||
'mu' : mu,
|
||||
'info': "Simulated test dataset generated in MATLAB to compare BGPLVM between python and MATLAB"}
|
||||
|
|
@ -693,15 +708,15 @@ def creep_data(data_set='creep_rupture'):
|
|||
X = all_data[:, features].copy()
|
||||
return data_details_return({'X': X, 'y': y}, data_set)
|
||||
|
||||
def cmu_mocap_49_balance():
|
||||
def cmu_mocap_49_balance(data_set='cmu_mocap'):
|
||||
"""Load CMU subject 49's one legged balancing motion that was used by Alvarez, Luengo and Lawrence at AISTATS 2009."""
|
||||
train_motions = ['18', '19']
|
||||
test_motions = ['20']
|
||||
data = cmu_mocap('49', train_motions, test_motions, sample_every=4)
|
||||
data = cmu_mocap('49', train_motions, test_motions, sample_every=4, data_set=data_set)
|
||||
data['info'] = "One legged balancing motions from CMU data base subject 49. As used in Alvarez, Luengo and Lawrence at AISTATS 2009. It consists of " + data['info']
|
||||
return data
|
||||
|
||||
def cmu_mocap_35_walk_jog():
|
||||
def cmu_mocap_35_walk_jog(data_set='cmu_mocap'):
|
||||
"""Load CMU subject 35's walking and jogging motions, the same data that was used by Taylor, Roweis and Hinton at NIPS 2007. but without their preprocessing. Also used by Lawrence at AISTATS 2007."""
|
||||
train_motions = ['01', '02', '03', '04', '05', '06',
|
||||
'07', '08', '09', '10', '11', '12',
|
||||
|
|
@ -709,7 +724,7 @@ def cmu_mocap_35_walk_jog():
|
|||
'20', '21', '22', '23', '24', '25',
|
||||
'26', '28', '30', '31', '32', '33', '34']
|
||||
test_motions = ['18', '29']
|
||||
data = cmu_mocap('35', train_motions, test_motions, sample_every=4)
|
||||
data = cmu_mocap('35', train_motions, test_motions, sample_every=4, data_set=data_set)
|
||||
data['info'] = "Walk and jog data from CMU data base subject 35. As used in Tayor, Roweis and Hinton at NIPS 2007, but without their pre-processing (i.e. as used by Lawrence at AISTATS 2007). It consists of " + data['info']
|
||||
return data
|
||||
|
||||
|
|
@ -721,7 +736,7 @@ def cmu_mocap(subject, train_motions, test_motions=[], sample_every=4, data_set=
|
|||
# Make sure the data is downloaded.
|
||||
all_motions = train_motions + test_motions
|
||||
resource = cmu_urls_files(([subject], [all_motions]))
|
||||
data_resources[data_set] = data_resources['cmu_mocap_full']
|
||||
data_resources[data_set] = data_resources['cmu_mocap_full'].copy()
|
||||
data_resources[data_set]['files'] = resource['files']
|
||||
data_resources[data_set]['urls'] = resource['urls']
|
||||
if resource['urls']:
|
||||
|
|
|
|||
|
|
@ -67,14 +67,14 @@ class tree:
|
|||
for i in range(len(self.vertices)):
|
||||
if self.vertices[i].id == id:
|
||||
return i
|
||||
raise Error, 'Reverse look up of id failed.'
|
||||
raise ValueError('Reverse look up of id failed.')
|
||||
|
||||
def get_index_by_name(self, name):
|
||||
"""Give the index associated with a given vertex name."""
|
||||
for i in range(len(self.vertices)):
|
||||
if self.vertices[i].name == name:
|
||||
return i
|
||||
raise Error, 'Reverse look up of name failed.'
|
||||
raise ValueError('Reverse look up of name failed.')
|
||||
|
||||
def order_vertices(self):
|
||||
"""Order vertices in the graph such that parents always have a lower index than children."""
|
||||
|
|
@ -433,6 +433,8 @@ class acclaim_skeleton(skeleton):
|
|||
lin = self.read_line(fid)
|
||||
while lin != ':DEGREES':
|
||||
lin = self.read_line(fid)
|
||||
if lin == '':
|
||||
raise ValueError('Could not find :DEGREES in ' + fid.name)
|
||||
|
||||
counter = 0
|
||||
lin = self.read_line(fid)
|
||||
|
|
@ -443,9 +445,9 @@ class acclaim_skeleton(skeleton):
|
|||
if frame_no:
|
||||
counter += 1
|
||||
if counter != frame_no:
|
||||
raise Error, 'Unexpected frame number.'
|
||||
raise ValueError('Unexpected frame number.')
|
||||
else:
|
||||
raise Error, 'Single bone name ...'
|
||||
raise ValueError('Single bone name ...')
|
||||
else:
|
||||
ind = self.get_index_by_name(parts[0])
|
||||
bones[ind].append(np.array([float(channel) for channel in parts[1:]]))
|
||||
|
|
@ -573,7 +575,7 @@ class acclaim_skeleton(skeleton):
|
|||
return
|
||||
lin = self.read_line(fid)
|
||||
else:
|
||||
raise Error, 'Unrecognised file format'
|
||||
raise ValueError('Unrecognised file format')
|
||||
self.finalize()
|
||||
|
||||
def read_units(self, fid):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue