mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-08 15:05:15 +02:00
Merge branch 'devel' of github.com:SheffieldML/GPy into devel
Conflicts: GPy/core/fitc.py
This commit is contained in:
commit
c774432fee
56 changed files with 783 additions and 807 deletions
|
|
@ -14,7 +14,7 @@ class FITC(SparseGP):
|
|||
sparse FITC approximation
|
||||
|
||||
:param X: inputs
|
||||
:type X: np.ndarray (N x Q)
|
||||
:type X: np.ndarray (num_data x Q)
|
||||
:param likelihood: a likelihood instance, containing the observed data
|
||||
:type likelihood: GPy.likelihood.(Gaussian | EP)
|
||||
:param kernel : the kernel (covariance function). See link kernels
|
||||
|
|
@ -57,7 +57,7 @@ class FITC(SparseGP):
|
|||
self.V_star = self.beta_star * self.likelihood.Y
|
||||
|
||||
# The rather complex computations of self.A
|
||||
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
|
||||
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data)))
|
||||
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
|
||||
self.A = tdot(tmp)
|
||||
|
||||
|
|
@ -113,7 +113,7 @@ class FITC(SparseGP):
|
|||
self._dpsi1_dX_jkj = 0
|
||||
self._dpsi1_dtheta_jkj = 0
|
||||
|
||||
for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3):
|
||||
for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.num_data),self.V_star,alpha,gamma_2,gamma_3):
|
||||
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
|
||||
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
|
||||
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
|
||||
|
|
@ -137,14 +137,14 @@ class FITC(SparseGP):
|
|||
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
|
||||
aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
|
||||
|
||||
dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
|
||||
dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
|
||||
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
|
||||
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
|
||||
|
||||
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
|
||||
alpha = mdot(LBiLmipsi1,self.V_star)
|
||||
alpha_ = mdot(LBiLmipsi1.T,alpha)
|
||||
dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise )
|
||||
dD_dnoise_2 = -0.5 * self.input_dim * np.sum(alpha_**2 * dbstar_dnoise )
|
||||
|
||||
dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
|
||||
dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
|
||||
|
|
@ -154,7 +154,7 @@ class FITC(SparseGP):
|
|||
|
||||
def log_likelihood(self):
|
||||
""" Compute the (lower bound on the) log marginal likelihood """
|
||||
A = -0.5 * self.N * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
|
||||
A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
|
||||
C = -self.output_dim * (np.sum(np.log(np.diag(self.LB))))
|
||||
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
|
||||
return A + C + D
|
||||
|
|
@ -204,8 +204,8 @@ class FITC(SparseGP):
|
|||
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
|
||||
|
||||
# Ci = I + (RPT0)Di(RPT0).T
|
||||
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
|
||||
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
|
||||
# C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T
|
||||
# = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T
|
||||
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
|
||||
# = I - V.T * V
|
||||
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
|
||||
|
|
|
|||
|
|
@ -33,8 +33,8 @@ class GP(GPBase):
|
|||
self._set_params(self._get_params())
|
||||
|
||||
def _set_params(self, p):
|
||||
self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
|
||||
self.likelihood._set_params(p[self.kern.Nparam_transformed():])
|
||||
self.kern._set_params_transformed(p[:self.kern.num_params_transformed()])
|
||||
self.likelihood._set_params(p[self.kern.num_params_transformed():])
|
||||
|
||||
self.K = self.kern.K(self.X)
|
||||
self.K += self.likelihood.covariance_matrix
|
||||
|
|
@ -46,12 +46,12 @@ class GP(GPBase):
|
|||
#alpha = np.dot(self.Ki, self.likelihood.Y)
|
||||
alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1)
|
||||
|
||||
self.dL_dK = 0.5 * (tdot(alpha) - self.input_dim * self.Ki)
|
||||
self.dL_dK = 0.5 * (tdot(alpha) - self.output_dim * self.Ki)
|
||||
else:
|
||||
#tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
|
||||
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
|
||||
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
|
||||
self.dL_dK = 0.5 * (tmp - self.input_dim * self.Ki)
|
||||
self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki)
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
|
||||
|
|
|
|||
|
|
@ -1,24 +1,24 @@
|
|||
import numpy as np
|
||||
import model
|
||||
from .. import kern
|
||||
from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D
|
||||
import pylab as pb
|
||||
from GPy.core.model import Model
|
||||
|
||||
class GPBase(model.model):
|
||||
class GPBase(Model):
|
||||
"""
|
||||
Gaussian Process model for holding shared behaviour between
|
||||
Gaussian Process Model for holding shared behaviour between
|
||||
sprase_GP and GP models
|
||||
"""
|
||||
|
||||
def __init__(self, X, likelihood, kernel, normalize_X=False):
|
||||
self.X = X
|
||||
assert len(self.X.shape) == 2
|
||||
self.N, self.input_dim = self.X.shape
|
||||
self.num_data, self.input_dim = self.X.shape
|
||||
assert isinstance(kernel, kern.kern)
|
||||
self.kern = kernel
|
||||
self.likelihood = likelihood
|
||||
assert self.X.shape[0] == self.likelihood.data.shape[0]
|
||||
self.N, self.output_dim = self.likelihood.data.shape
|
||||
self.num_data, self.output_dim = self.likelihood.data.shape
|
||||
|
||||
if normalize_X:
|
||||
self._Xmean = X.mean(0)[None, :]
|
||||
|
|
@ -28,7 +28,7 @@ class GPBase(model.model):
|
|||
self._Xmean = np.zeros((1, self.input_dim))
|
||||
self._Xstd = np.ones((1, self.input_dim))
|
||||
|
||||
model.model.__init__(self)
|
||||
Model.__init__(self)
|
||||
|
||||
# All leaf nodes should call self._set_params(self._get_params()) at
|
||||
# the end
|
||||
|
|
@ -84,8 +84,8 @@ class GPBase(model.model):
|
|||
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
|
||||
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
||||
m = m.reshape(resolution, resolution).T
|
||||
ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max())
|
||||
ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
|
||||
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
|
||||
ax.set_xlim(xmin[0], xmax[0])
|
||||
ax.set_ylim(xmin[1], xmax[1])
|
||||
else:
|
||||
|
|
@ -94,9 +94,9 @@ class GPBase(model.model):
|
|||
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None):
|
||||
"""
|
||||
TODO: Docstrings!
|
||||
|
||||
:param levels: for 2D plotting, the number of contour levels to use
|
||||
is ax is None, create a new figure
|
||||
|
||||
"""
|
||||
# TODO include samples
|
||||
if which_data == 'all':
|
||||
|
|
@ -111,7 +111,7 @@ class GPBase(model.model):
|
|||
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
|
||||
|
||||
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
|
||||
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
||||
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
||||
for d in range(m.shape[1]):
|
||||
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
|
||||
ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5)
|
||||
|
|
@ -122,13 +122,13 @@ class GPBase(model.model):
|
|||
|
||||
elif self.X.shape[1] == 2: # FIXME
|
||||
resolution = resolution or 50
|
||||
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
|
||||
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
|
||||
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
|
||||
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
||||
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
||||
m = m.reshape(resolution, resolution).T
|
||||
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
|
||||
Yf = self.likelihood.Y.flatten()
|
||||
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
|
||||
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
|
||||
ax.set_xlim(xmin[0], xmax[0])
|
||||
ax.set_ylim(xmin[1], xmax[1])
|
||||
|
||||
|
|
|
|||
|
|
@ -6,37 +6,32 @@ from .. import likelihoods
|
|||
from ..inference import optimization
|
||||
from ..util.linalg import jitchol
|
||||
from GPy.util.misc import opt_wrapper
|
||||
from parameterised import parameterised
|
||||
from scipy import optimize
|
||||
from parameterised import Parameterised
|
||||
import multiprocessing as mp
|
||||
import numpy as np
|
||||
import priors
|
||||
import re
|
||||
import sys
|
||||
import pdb
|
||||
from GPy.core.domains import POSITIVE, REAL
|
||||
# import numdifftools as ndt
|
||||
|
||||
class model(parameterised):
|
||||
class Model(Parameterised):
|
||||
def __init__(self):
|
||||
parameterised.__init__(self)
|
||||
Parameterised.__init__(self)
|
||||
self.priors = None
|
||||
self.optimization_runs = []
|
||||
self.sampling_runs = []
|
||||
self.preferred_optimizer = 'scg'
|
||||
#self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
|
||||
# self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
|
||||
def _get_params(self):
|
||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||
raise NotImplementedError, "this needs to be implemented to use the Model class"
|
||||
def _set_params(self, x):
|
||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||
raise NotImplementedError, "this needs to be implemented to use the Model class"
|
||||
def log_likelihood(self):
|
||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||
raise NotImplementedError, "this needs to be implemented to use the Model class"
|
||||
def _log_likelihood_gradients(self):
|
||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||
raise NotImplementedError, "this needs to be implemented to use the Model class"
|
||||
|
||||
def set_prior(self, regexp, what):
|
||||
"""
|
||||
Sets priors on the model parameters.
|
||||
Sets priors on the Model parameters.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
|
|
@ -65,7 +60,7 @@ class model(parameterised):
|
|||
if len(tie_matches) > 1:
|
||||
raise ValueError, "cannot place Prior across multiple ties"
|
||||
elif len(tie_matches) == 1:
|
||||
which = which[:1] # just place a Prior object on the first parameter
|
||||
which = which[:1] # just place a Prior object on the first parameter
|
||||
|
||||
|
||||
# check constraints are okay
|
||||
|
|
@ -95,7 +90,7 @@ class model(parameterised):
|
|||
|
||||
def get_gradient(self, name, return_names=False):
|
||||
"""
|
||||
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
||||
Get Model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
||||
"""
|
||||
matches = self.grep_param_names(name)
|
||||
if len(matches):
|
||||
|
|
@ -135,7 +130,7 @@ class model(parameterised):
|
|||
|
||||
def randomize(self):
|
||||
"""
|
||||
Randomize the model.
|
||||
Randomize the Model.
|
||||
Make this draw from the Prior if one exists, else draw from N(0,1)
|
||||
"""
|
||||
# first take care of all parameters (from N(0,1))
|
||||
|
|
@ -147,16 +142,16 @@ class model(parameterised):
|
|||
if self.priors is not None:
|
||||
[np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
|
||||
self._set_params(x)
|
||||
self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
||||
self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
||||
|
||||
|
||||
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
|
||||
def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
|
||||
"""
|
||||
Perform random restarts of the model, and set the model to the best
|
||||
Perform random restarts of the Model, and set the Model to the best
|
||||
seen solution.
|
||||
|
||||
If the robust flag is set, exceptions raised during optimizations will
|
||||
be handled silently. If _all_ runs fail, the model is reset to the
|
||||
be handled silently. If _all_ runs fail, the Model is reset to the
|
||||
existing parameter values.
|
||||
|
||||
Notes
|
||||
|
|
@ -179,19 +174,19 @@ class model(parameterised):
|
|||
try:
|
||||
jobs = []
|
||||
pool = mp.Pool(processes=num_processes)
|
||||
for i in range(Nrestarts):
|
||||
for i in range(num_restarts):
|
||||
self.randomize()
|
||||
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
|
||||
jobs.append(job)
|
||||
|
||||
pool.close() # signal that no more data coming in
|
||||
pool.join() # wait for all the tasks to complete
|
||||
pool.close() # signal that no more data coming in
|
||||
pool.join() # wait for all the tasks to complete
|
||||
except KeyboardInterrupt:
|
||||
print "Ctrl+c received, terminating and joining pool."
|
||||
pool.terminate()
|
||||
pool.join()
|
||||
|
||||
for i in range(Nrestarts):
|
||||
for i in range(num_restarts):
|
||||
try:
|
||||
if not parallel:
|
||||
self.randomize()
|
||||
|
|
@ -200,10 +195,10 @@ class model(parameterised):
|
|||
self.optimization_runs.append(jobs[i].get())
|
||||
|
||||
if verbose:
|
||||
print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt))
|
||||
print("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt))
|
||||
except Exception as e:
|
||||
if robust:
|
||||
print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts))
|
||||
print("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts))
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
|
@ -218,11 +213,11 @@ class model(parameterised):
|
|||
Ensure that any variables which should clearly be positive have been constrained somehow.
|
||||
"""
|
||||
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa']
|
||||
param_names = self._get_param_names()
|
||||
# param_names = self._get_param_names()
|
||||
currently_constrained = self.all_constrained_indices()
|
||||
to_make_positive = []
|
||||
for s in positive_strings:
|
||||
for i in self.grep_param_names(".*"+s):
|
||||
for i in self.grep_param_names(".*" + s):
|
||||
if not (i in currently_constrained):
|
||||
to_make_positive.append(i)
|
||||
if len(to_make_positive):
|
||||
|
|
@ -240,18 +235,18 @@ class model(parameterised):
|
|||
Gets the gradients from the likelihood and the priors.
|
||||
"""
|
||||
self._set_params_transformed(x)
|
||||
obj_grads = - self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||
return obj_grads
|
||||
|
||||
def objective_and_gradients(self, x):
|
||||
self._set_params_transformed(x)
|
||||
obj_f = -self.log_likelihood() - self.log_prior()
|
||||
obj_grads = - self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||
return obj_f, obj_grads
|
||||
|
||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||
"""
|
||||
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
|
||||
Optimize the Model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
|
||||
kwargs are passed to the optimizer. They can be:
|
||||
|
||||
:max_f_eval: maximum number of function evaluations
|
||||
|
|
@ -274,7 +269,7 @@ class model(parameterised):
|
|||
|
||||
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
|
||||
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
|
||||
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
|
||||
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) # @UndefinedVariable
|
||||
sgd.run()
|
||||
self.optimization_runs.append(sgd)
|
||||
|
||||
|
|
@ -291,7 +286,7 @@ class model(parameterised):
|
|||
def f(x):
|
||||
self._set_params(x)
|
||||
return self.log_likelihood()
|
||||
h = ndt.Hessian(f)
|
||||
h = ndt.Hessian(f) # @UndefinedVariable
|
||||
A = -h(x)
|
||||
self._set_params(x)
|
||||
# check for almost zero components on the diagonal which screw up the cholesky
|
||||
|
|
@ -300,7 +295,7 @@ class model(parameterised):
|
|||
return A
|
||||
|
||||
def Laplace_evidence(self):
|
||||
"""Returns an estiamte of the model evidence based on the Laplace approximation.
|
||||
"""Returns an estiamte of the Model evidence based on the Laplace approximation.
|
||||
Uses a numerical estimate of the hessian if none is available analytically"""
|
||||
A = self.Laplace_covariance()
|
||||
try:
|
||||
|
|
@ -310,12 +305,12 @@ class model(parameterised):
|
|||
return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
|
||||
|
||||
def __str__(self):
|
||||
s = parameterised.__str__(self).split('\n')
|
||||
s = Parameterised.__str__(self).split('\n')
|
||||
# add priors to the string
|
||||
if self.priors is not None:
|
||||
strs = [str(p) if p is not None else '' for p in self.priors]
|
||||
else:
|
||||
strs = ['']*len(self._get_params())
|
||||
strs = [''] * len(self._get_params())
|
||||
width = np.array(max([len(p) for p in strs] + [5])) + 4
|
||||
|
||||
log_like = self.log_likelihood()
|
||||
|
|
@ -336,7 +331,7 @@ class model(parameterised):
|
|||
|
||||
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
|
||||
"""
|
||||
Check the gradient of the model by comparing to a numerical estimate.
|
||||
Check the gradient of the Model by comparing to a numerical estimate.
|
||||
If the verbose flag is passed, invividual components are tested (and printed)
|
||||
|
||||
:param verbose: If True, print a "full" checking of each parameter
|
||||
|
|
@ -389,7 +384,7 @@ class model(parameterised):
|
|||
param_list = range(len(x))
|
||||
else:
|
||||
param_list = self.grep_param_names(target_param, transformed=True, search=True)
|
||||
if not param_list:
|
||||
if not np.any(param_list):
|
||||
print "No free parameters to check"
|
||||
return
|
||||
|
||||
|
|
@ -419,15 +414,15 @@ class model(parameterised):
|
|||
|
||||
def input_sensitivity(self):
|
||||
"""
|
||||
return an array describing the sesitivity of the model to each input
|
||||
return an array describing the sesitivity of the Model to each input
|
||||
|
||||
NB. Right now, we're basing this on the lengthscales (or
|
||||
variances) of the kernel. TODO: proper sensitivity analysis
|
||||
where we integrate across the model inputs and evaluate the
|
||||
effect on the variance of the model output. """
|
||||
where we integrate across the Model inputs and evaluate the
|
||||
effect on the variance of the Model output. """
|
||||
|
||||
if not hasattr(self, 'kern'):
|
||||
raise ValueError, "this model has no kernel"
|
||||
raise ValueError, "this Model has no kernel"
|
||||
|
||||
k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
|
||||
if (not len(k) == 1) or (not k[0].ARD):
|
||||
|
|
@ -474,8 +469,8 @@ class model(parameterised):
|
|||
ll_change = new_ll - last_ll
|
||||
|
||||
if ll_change < 0:
|
||||
self.likelihood = last_approximation # restore previous likelihood approximation
|
||||
self._set_params(last_params) # restore model parameters
|
||||
self.likelihood = last_approximation # restore previous likelihood approximation
|
||||
self._set_params(last_params) # restore Model parameters
|
||||
print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
|
||||
stop = True
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -6,12 +6,10 @@ import numpy as np
|
|||
import re
|
||||
import copy
|
||||
import cPickle
|
||||
import os
|
||||
from ..util.squashers import sigmoid
|
||||
import warnings
|
||||
import transformations
|
||||
|
||||
class parameterised(object):
|
||||
class Parameterised(object):
|
||||
def __init__(self):
|
||||
"""
|
||||
This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters
|
||||
|
|
@ -36,7 +34,7 @@ class parameterised(object):
|
|||
"""
|
||||
Returns a **copy** of parameters in non transformed space
|
||||
|
||||
:see_also: :py:func:`GPy.core.parameterised.params_transformed`
|
||||
:see_also: :py:func:`GPy.core.Parameterised.params_transformed`
|
||||
"""
|
||||
return self._get_params()
|
||||
|
||||
|
|
@ -49,7 +47,7 @@ class parameterised(object):
|
|||
"""
|
||||
Returns a **copy** of parameters in transformed space
|
||||
|
||||
:see_also: :py:func:`GPy.core.parameterised.params`
|
||||
:see_also: :py:func:`GPy.core.Parameterised.params`
|
||||
"""
|
||||
return self._get_params_transformed()
|
||||
|
||||
|
|
@ -113,7 +111,7 @@ class parameterised(object):
|
|||
if hasattr(self, 'prior'):
|
||||
pass
|
||||
|
||||
self._set_params_transformed(self._get_params_transformed()) # sets tied parameters to single value
|
||||
self._set_params_transformed(self._get_params_transformed()) # sets tied parameters to single value
|
||||
|
||||
def untie_everything(self):
|
||||
"""Unties all parameters by setting tied_indices to an empty list."""
|
||||
|
|
@ -145,7 +143,7 @@ class parameterised(object):
|
|||
else:
|
||||
return np.nonzero([regexp.match(name) for name in names])[0]
|
||||
|
||||
def Nparam_transformed(self):
|
||||
def num_params_transformed(self):
|
||||
removed = 0
|
||||
for tie in self.tied_indices:
|
||||
removed += tie.size - 1
|
||||
|
|
@ -159,18 +157,18 @@ class parameterised(object):
|
|||
"""Unconstrain matching parameters. does not untie parameters"""
|
||||
matches = self.grep_param_names(regexp)
|
||||
|
||||
#tranformed contraints:
|
||||
# tranformed contraints:
|
||||
for match in matches:
|
||||
self.constrained_indices = [i[i<>match] for i in self.constrained_indices]
|
||||
self.constrained_indices = [i[i <> match] for i in self.constrained_indices]
|
||||
|
||||
#remove empty constraints
|
||||
tmp = zip(*[(i,t) for i,t in zip(self.constrained_indices,self.constraints) if len(i)])
|
||||
# remove empty constraints
|
||||
tmp = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)])
|
||||
if tmp:
|
||||
self.constrained_indices, self.constraints = zip(*[(i,t) for i,t in zip(self.constrained_indices,self.constraints) if len(i)])
|
||||
self.constrained_indices, self.constraints = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)])
|
||||
self.constrained_indices, self.constraints = list(self.constrained_indices), list(self.constraints)
|
||||
|
||||
# fixed:
|
||||
self.fixed_values = [np.delete(values, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices,values in zip(self.fixed_indices,self.fixed_values)]
|
||||
self.fixed_values = [np.delete(values, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices, values in zip(self.fixed_indices, self.fixed_values)]
|
||||
self.fixed_indices = [np.delete(indices, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices in self.fixed_indices]
|
||||
|
||||
# remove empty elements
|
||||
|
|
@ -189,7 +187,7 @@ class parameterised(object):
|
|||
""" Set positive constraints. """
|
||||
self.constrain(regexp, transformations.logexp())
|
||||
|
||||
def constrain_bounded(self, regexp,lower, upper):
|
||||
def constrain_bounded(self, regexp, lower, upper):
|
||||
""" Set bounded constraints. """
|
||||
self.constrain(regexp, transformations.logistic(lower, upper))
|
||||
|
||||
|
|
@ -199,8 +197,8 @@ class parameterised(object):
|
|||
else:
|
||||
return np.empty(shape=(0,))
|
||||
|
||||
def constrain(self,regexp,transform):
|
||||
assert isinstance(transform,transformations.transformation)
|
||||
def constrain(self, regexp, transform):
|
||||
assert isinstance(transform, transformations.transformation)
|
||||
|
||||
matches = self.grep_param_names(regexp)
|
||||
overlap = set(matches).intersection(set(self.all_constrained_indices()))
|
||||
|
|
@ -251,7 +249,7 @@ class parameterised(object):
|
|||
def _get_params_transformed(self):
|
||||
"""use self._get_params to get the 'true' parameters of the model, which are then tied, constrained and fixed"""
|
||||
x = self._get_params()
|
||||
[np.put(x,i,t.finv(x[i])) for i,t in zip(self.constrained_indices,self.constraints)]
|
||||
[np.put(x, i, t.finv(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
|
||||
|
||||
to_remove = self.fixed_indices + [t[1:] for t in self.tied_indices]
|
||||
if len(to_remove):
|
||||
|
|
@ -263,7 +261,7 @@ class parameterised(object):
|
|||
""" takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params"""
|
||||
self._set_params(self._untransform_params(x))
|
||||
|
||||
def _untransform_params(self,x):
|
||||
def _untransform_params(self, x):
|
||||
"""
|
||||
The transformation required for _set_params_transformed.
|
||||
|
||||
|
|
@ -290,9 +288,9 @@ class parameterised(object):
|
|||
[np.put(xx, i, v) for i, v in zip(self.fixed_indices, self.fixed_values)]
|
||||
[np.put(xx, i, v) for i, v in [(t[1:], xx[t[0]]) for t in self.tied_indices] ]
|
||||
|
||||
[np.put(xx,i,t.f(xx[i])) for i,t in zip(self.constrained_indices, self.constraints)]
|
||||
if hasattr(self,'debug'):
|
||||
stop
|
||||
[np.put(xx, i, t.f(xx[i])) for i, t in zip(self.constrained_indices, self.constraints)]
|
||||
if hasattr(self, 'debug'):
|
||||
stop # @UndefinedVariable
|
||||
|
||||
return xx
|
||||
|
||||
|
|
@ -316,7 +314,7 @@ class parameterised(object):
|
|||
remove = np.hstack((remove, np.hstack(self.fixed_indices)))
|
||||
|
||||
# add markers to show that some variables are constrained
|
||||
for i,t in zip(self.constrained_indices,self.constraints):
|
||||
for i, t in zip(self.constrained_indices, self.constraints):
|
||||
for ii in i:
|
||||
n[ii] = n[ii] + t.__str__()
|
||||
|
||||
|
|
@ -333,10 +331,10 @@ class parameterised(object):
|
|||
if not N:
|
||||
return "This object has no free parameters."
|
||||
header = ['Name', 'Value', 'Constraints', 'Ties']
|
||||
values = self._get_params() # map(str,self._get_params())
|
||||
values = self._get_params() # map(str,self._get_params())
|
||||
# sort out the constraints
|
||||
constraints = [''] * len(names)
|
||||
for i,t in zip(self.constrained_indices,self.constraints):
|
||||
for i, t in zip(self.constrained_indices, self.constraints):
|
||||
for ii in i:
|
||||
constraints[ii] = t.__str__()
|
||||
for i in self.fixed_indices:
|
||||
|
|
@ -354,7 +352,7 @@ class parameterised(object):
|
|||
max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
|
||||
max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])])
|
||||
cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
|
||||
columns = cols.sum()
|
||||
# columns = cols.sum()
|
||||
|
||||
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
|
||||
header_string = map(lambda x: '|'.join(x), [header_string])
|
||||
|
|
|
|||
|
|
@ -13,13 +13,13 @@ class SparseGP(GPBase):
|
|||
Variational sparse GP model
|
||||
|
||||
:param X: inputs
|
||||
:type X: np.ndarray (N x input_dim)
|
||||
:type X: np.ndarray (num_data x input_dim)
|
||||
:param likelihood: a likelihood instance, containing the observed data
|
||||
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
|
||||
:param kernel : the kernel (covariance function). See link kernels
|
||||
:type kernel: a GPy.kern.kern instance
|
||||
:param X_variance: The uncertainty in the measurements of X (Gaussian variance)
|
||||
:type X_variance: np.ndarray (N x input_dim) | None
|
||||
:type X_variance: np.ndarray (num_data x input_dim) | None
|
||||
:param Z: inducing inputs (optional, see note)
|
||||
:type Z: np.ndarray (num_inducing x input_dim) | None
|
||||
:param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||
|
|
@ -69,7 +69,7 @@ class SparseGP(GPBase):
|
|||
# The rather complex computations of self.A
|
||||
if self.has_uncertain_inputs:
|
||||
if self.likelihood.is_heteroscedastic:
|
||||
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0)
|
||||
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.num_data, 1, 1))).sum(0)
|
||||
else:
|
||||
psi2_beta = self.psi2.sum(0) * self.likelihood.precision
|
||||
evals, evecs = linalg.eigh(psi2_beta)
|
||||
|
|
@ -77,7 +77,7 @@ class SparseGP(GPBase):
|
|||
tmp = evecs * np.sqrt(clipped_evals)
|
||||
else:
|
||||
if self.likelihood.is_heteroscedastic:
|
||||
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)))
|
||||
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.num_data)))
|
||||
else:
|
||||
tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
|
||||
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
|
||||
|
|
@ -99,28 +99,28 @@ class SparseGP(GPBase):
|
|||
|
||||
# Compute dL_dKmm
|
||||
tmp = tdot(self._LBi_Lmi_psi1V)
|
||||
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.input_dim * np.eye(self.num_inducing) + tmp)
|
||||
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.output_dim * np.eye(self.num_inducing) + tmp)
|
||||
tmp = -0.5 * self.DBi_plus_BiPBi
|
||||
tmp += -0.5 * self.B * self.input_dim
|
||||
tmp += self.input_dim * np.eye(self.num_inducing)
|
||||
tmp += -0.5 * self.B * self.output_dim
|
||||
tmp += self.output_dim * np.eye(self.num_inducing)
|
||||
self.dL_dKmm = backsub_both_sides(self.Lm, tmp)
|
||||
|
||||
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
|
||||
self.dL_dpsi0 = -0.5 * self.input_dim * (self.likelihood.precision * np.ones([self.N, 1])).flatten()
|
||||
self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten()
|
||||
self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T)
|
||||
dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.input_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
|
||||
dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
|
||||
|
||||
if self.likelihood.is_heteroscedastic:
|
||||
if self.has_uncertain_inputs:
|
||||
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
|
||||
else:
|
||||
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.N))
|
||||
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.num_data))
|
||||
self.dL_dpsi2 = None
|
||||
else:
|
||||
dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
|
||||
if self.has_uncertain_inputs:
|
||||
# repeat for each of the N psi_2 matrices
|
||||
self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.N, axis=0)
|
||||
self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.num_data, axis=0)
|
||||
else:
|
||||
# subsume back into psi1 (==Kmn)
|
||||
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1)
|
||||
|
|
@ -135,26 +135,26 @@ class SparseGP(GPBase):
|
|||
raise NotImplementedError, "heteroscedatic derivates not implemented"
|
||||
else:
|
||||
# likelihood is not heterscedatic
|
||||
self.partial_for_likelihood = -0.5 * self.N * self.input_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
|
||||
self.partial_for_likelihood += 0.5 * self.input_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
|
||||
self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
|
||||
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
|
||||
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
|
||||
|
||||
def log_likelihood(self):
|
||||
""" Compute the (lower bound on the) log marginal likelihood """
|
||||
if self.likelihood.is_heteroscedastic:
|
||||
A = -0.5 * self.N * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
|
||||
A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
|
||||
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
|
||||
else:
|
||||
A = -0.5 * self.N * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
|
||||
A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
|
||||
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
|
||||
C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
|
||||
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
|
||||
return A + B + C + D + self.likelihood.Z
|
||||
|
||||
def _set_params(self, p):
|
||||
self.Z = p[:self.num_inducing * self.output_dim].reshape(self.num_inducing, self.input_dim)
|
||||
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
|
||||
self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
|
||||
self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim)
|
||||
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.num_params])
|
||||
self.likelihood._set_params(p[self.Z.size + self.kern.num_params:])
|
||||
self._compute_kernel_matrices()
|
||||
self._computations()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue