Merge branch 'devel' of github.com:SheffieldML/GPy into devel

Conflicts:
	GPy/core/fitc.py
This commit is contained in:
Ricardo 2013-06-05 16:37:57 +01:00
commit c774432fee
56 changed files with 783 additions and 807 deletions

View file

@ -14,7 +14,7 @@ class FITC(SparseGP):
sparse FITC approximation
:param X: inputs
:type X: np.ndarray (N x Q)
:type X: np.ndarray (num_data x Q)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel : the kernel (covariance function). See link kernels
@ -57,7 +57,7 @@ class FITC(SparseGP):
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N)))
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
@ -113,7 +113,7 @@ class FITC(SparseGP):
self._dpsi1_dX_jkj = 0
self._dpsi1_dtheta_jkj = 0
for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3):
for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.num_data),self.V_star,alpha,gamma_2,gamma_3):
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
@ -137,14 +137,14 @@ class FITC(SparseGP):
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star)
alpha_ = mdot(LBiLmipsi1.T,alpha)
dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise )
dD_dnoise_2 = -0.5 * self.input_dim * np.sum(alpha_**2 * dbstar_dnoise )
dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
@ -154,7 +154,7 @@ class FITC(SparseGP):
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.N * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.output_dim * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D
@ -204,8 +204,8 @@ class FITC(SparseGP):
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T
# C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)

View file

@ -33,8 +33,8 @@ class GP(GPBase):
self._set_params(self._get_params())
def _set_params(self, p):
self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
self.likelihood._set_params(p[self.kern.Nparam_transformed():])
self.kern._set_params_transformed(p[:self.kern.num_params_transformed()])
self.likelihood._set_params(p[self.kern.num_params_transformed():])
self.K = self.kern.K(self.X)
self.K += self.likelihood.covariance_matrix
@ -46,12 +46,12 @@ class GP(GPBase):
#alpha = np.dot(self.Ki, self.likelihood.Y)
alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1)
self.dL_dK = 0.5 * (tdot(alpha) - self.input_dim * self.Ki)
self.dL_dK = 0.5 * (tdot(alpha) - self.output_dim * self.Ki)
else:
#tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.input_dim * self.Ki)
self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki)
def _get_params(self):
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))

View file

@ -1,24 +1,24 @@
import numpy as np
import model
from .. import kern
from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D
import pylab as pb
from GPy.core.model import Model
class GPBase(model.model):
class GPBase(Model):
"""
Gaussian Process model for holding shared behaviour between
Gaussian Process Model for holding shared behaviour between
sprase_GP and GP models
"""
def __init__(self, X, likelihood, kernel, normalize_X=False):
self.X = X
assert len(self.X.shape) == 2
self.N, self.input_dim = self.X.shape
self.num_data, self.input_dim = self.X.shape
assert isinstance(kernel, kern.kern)
self.kern = kernel
self.likelihood = likelihood
assert self.X.shape[0] == self.likelihood.data.shape[0]
self.N, self.output_dim = self.likelihood.data.shape
self.num_data, self.output_dim = self.likelihood.data.shape
if normalize_X:
self._Xmean = X.mean(0)[None, :]
@ -28,7 +28,7 @@ class GPBase(model.model):
self._Xmean = np.zeros((1, self.input_dim))
self._Xstd = np.ones((1, self.input_dim))
model.model.__init__(self)
Model.__init__(self)
# All leaf nodes should call self._set_params(self._get_params()) at
# the end
@ -84,8 +84,8 @@ class GPBase(model.model):
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
m, v = self._raw_predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max())
ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
else:
@ -94,9 +94,9 @@ class GPBase(model.model):
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None):
"""
TODO: Docstrings!
:param levels: for 2D plotting, the number of contour levels to use
is ax is None, create a new figure
"""
# TODO include samples
if which_data == 'all':
@ -111,7 +111,7 @@ class GPBase(model.model):
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5)
@ -122,13 +122,13 @@ class GPBase(model.model):
elif self.X.shape[1] == 2: # FIXME
resolution = resolution or 50
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
Yf = self.likelihood.Y.flatten()
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])

View file

@ -6,37 +6,32 @@ from .. import likelihoods
from ..inference import optimization
from ..util.linalg import jitchol
from GPy.util.misc import opt_wrapper
from parameterised import parameterised
from scipy import optimize
from parameterised import Parameterised
import multiprocessing as mp
import numpy as np
import priors
import re
import sys
import pdb
from GPy.core.domains import POSITIVE, REAL
# import numdifftools as ndt
class model(parameterised):
class Model(Parameterised):
def __init__(self):
parameterised.__init__(self)
Parameterised.__init__(self)
self.priors = None
self.optimization_runs = []
self.sampling_runs = []
self.preferred_optimizer = 'scg'
#self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
# self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
def _get_params(self):
raise NotImplementedError, "this needs to be implemented to use the model class"
raise NotImplementedError, "this needs to be implemented to use the Model class"
def _set_params(self, x):
raise NotImplementedError, "this needs to be implemented to use the model class"
raise NotImplementedError, "this needs to be implemented to use the Model class"
def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to use the model class"
raise NotImplementedError, "this needs to be implemented to use the Model class"
def _log_likelihood_gradients(self):
raise NotImplementedError, "this needs to be implemented to use the model class"
raise NotImplementedError, "this needs to be implemented to use the Model class"
def set_prior(self, regexp, what):
"""
Sets priors on the model parameters.
Sets priors on the Model parameters.
Arguments
---------
@ -65,7 +60,7 @@ class model(parameterised):
if len(tie_matches) > 1:
raise ValueError, "cannot place Prior across multiple ties"
elif len(tie_matches) == 1:
which = which[:1] # just place a Prior object on the first parameter
which = which[:1] # just place a Prior object on the first parameter
# check constraints are okay
@ -95,7 +90,7 @@ class model(parameterised):
def get_gradient(self, name, return_names=False):
"""
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
Get Model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
"""
matches = self.grep_param_names(name)
if len(matches):
@ -135,7 +130,7 @@ class model(parameterised):
def randomize(self):
"""
Randomize the model.
Randomize the Model.
Make this draw from the Prior if one exists, else draw from N(0,1)
"""
# first take care of all parameters (from N(0,1))
@ -147,16 +142,16 @@ class model(parameterised):
if self.priors is not None:
[np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
self._set_params(x)
self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
"""
Perform random restarts of the model, and set the model to the best
Perform random restarts of the Model, and set the Model to the best
seen solution.
If the robust flag is set, exceptions raised during optimizations will
be handled silently. If _all_ runs fail, the model is reset to the
be handled silently. If _all_ runs fail, the Model is reset to the
existing parameter values.
Notes
@ -179,19 +174,19 @@ class model(parameterised):
try:
jobs = []
pool = mp.Pool(processes=num_processes)
for i in range(Nrestarts):
for i in range(num_restarts):
self.randomize()
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
jobs.append(job)
pool.close() # signal that no more data coming in
pool.join() # wait for all the tasks to complete
pool.close() # signal that no more data coming in
pool.join() # wait for all the tasks to complete
except KeyboardInterrupt:
print "Ctrl+c received, terminating and joining pool."
pool.terminate()
pool.join()
for i in range(Nrestarts):
for i in range(num_restarts):
try:
if not parallel:
self.randomize()
@ -200,10 +195,10 @@ class model(parameterised):
self.optimization_runs.append(jobs[i].get())
if verbose:
print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt))
print("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt))
except Exception as e:
if robust:
print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts))
print("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts))
else:
raise e
@ -218,11 +213,11 @@ class model(parameterised):
Ensure that any variables which should clearly be positive have been constrained somehow.
"""
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa']
param_names = self._get_param_names()
# param_names = self._get_param_names()
currently_constrained = self.all_constrained_indices()
to_make_positive = []
for s in positive_strings:
for i in self.grep_param_names(".*"+s):
for i in self.grep_param_names(".*" + s):
if not (i in currently_constrained):
to_make_positive.append(i)
if len(to_make_positive):
@ -240,18 +235,18 @@ class model(parameterised):
Gets the gradients from the likelihood and the priors.
"""
self._set_params_transformed(x)
obj_grads = - self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
return obj_grads
def objective_and_gradients(self, x):
self._set_params_transformed(x)
obj_f = -self.log_likelihood() - self.log_prior()
obj_grads = - self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
return obj_f, obj_grads
def optimize(self, optimizer=None, start=None, **kwargs):
"""
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
Optimize the Model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be:
:max_f_eval: maximum number of function evaluations
@ -274,7 +269,7 @@ class model(parameterised):
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) # @UndefinedVariable
sgd.run()
self.optimization_runs.append(sgd)
@ -291,7 +286,7 @@ class model(parameterised):
def f(x):
self._set_params(x)
return self.log_likelihood()
h = ndt.Hessian(f)
h = ndt.Hessian(f) # @UndefinedVariable
A = -h(x)
self._set_params(x)
# check for almost zero components on the diagonal which screw up the cholesky
@ -300,7 +295,7 @@ class model(parameterised):
return A
def Laplace_evidence(self):
"""Returns an estiamte of the model evidence based on the Laplace approximation.
"""Returns an estiamte of the Model evidence based on the Laplace approximation.
Uses a numerical estimate of the hessian if none is available analytically"""
A = self.Laplace_covariance()
try:
@ -310,12 +305,12 @@ class model(parameterised):
return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
def __str__(self):
s = parameterised.__str__(self).split('\n')
s = Parameterised.__str__(self).split('\n')
# add priors to the string
if self.priors is not None:
strs = [str(p) if p is not None else '' for p in self.priors]
else:
strs = ['']*len(self._get_params())
strs = [''] * len(self._get_params())
width = np.array(max([len(p) for p in strs] + [5])) + 4
log_like = self.log_likelihood()
@ -336,7 +331,7 @@ class model(parameterised):
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
"""
Check the gradient of the model by comparing to a numerical estimate.
Check the gradient of the Model by comparing to a numerical estimate.
If the verbose flag is passed, invividual components are tested (and printed)
:param verbose: If True, print a "full" checking of each parameter
@ -389,7 +384,7 @@ class model(parameterised):
param_list = range(len(x))
else:
param_list = self.grep_param_names(target_param, transformed=True, search=True)
if not param_list:
if not np.any(param_list):
print "No free parameters to check"
return
@ -419,15 +414,15 @@ class model(parameterised):
def input_sensitivity(self):
"""
return an array describing the sesitivity of the model to each input
return an array describing the sesitivity of the Model to each input
NB. Right now, we're basing this on the lengthscales (or
variances) of the kernel. TODO: proper sensitivity analysis
where we integrate across the model inputs and evaluate the
effect on the variance of the model output. """
where we integrate across the Model inputs and evaluate the
effect on the variance of the Model output. """
if not hasattr(self, 'kern'):
raise ValueError, "this model has no kernel"
raise ValueError, "this Model has no kernel"
k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
if (not len(k) == 1) or (not k[0].ARD):
@ -474,8 +469,8 @@ class model(parameterised):
ll_change = new_ll - last_ll
if ll_change < 0:
self.likelihood = last_approximation # restore previous likelihood approximation
self._set_params(last_params) # restore model parameters
self.likelihood = last_approximation # restore previous likelihood approximation
self._set_params(last_params) # restore Model parameters
print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
stop = True
else:

View file

@ -6,12 +6,10 @@ import numpy as np
import re
import copy
import cPickle
import os
from ..util.squashers import sigmoid
import warnings
import transformations
class parameterised(object):
class Parameterised(object):
def __init__(self):
"""
This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters
@ -36,7 +34,7 @@ class parameterised(object):
"""
Returns a **copy** of parameters in non transformed space
:see_also: :py:func:`GPy.core.parameterised.params_transformed`
:see_also: :py:func:`GPy.core.Parameterised.params_transformed`
"""
return self._get_params()
@ -49,7 +47,7 @@ class parameterised(object):
"""
Returns a **copy** of parameters in transformed space
:see_also: :py:func:`GPy.core.parameterised.params`
:see_also: :py:func:`GPy.core.Parameterised.params`
"""
return self._get_params_transformed()
@ -113,7 +111,7 @@ class parameterised(object):
if hasattr(self, 'prior'):
pass
self._set_params_transformed(self._get_params_transformed()) # sets tied parameters to single value
self._set_params_transformed(self._get_params_transformed()) # sets tied parameters to single value
def untie_everything(self):
"""Unties all parameters by setting tied_indices to an empty list."""
@ -145,7 +143,7 @@ class parameterised(object):
else:
return np.nonzero([regexp.match(name) for name in names])[0]
def Nparam_transformed(self):
def num_params_transformed(self):
removed = 0
for tie in self.tied_indices:
removed += tie.size - 1
@ -159,18 +157,18 @@ class parameterised(object):
"""Unconstrain matching parameters. does not untie parameters"""
matches = self.grep_param_names(regexp)
#tranformed contraints:
# tranformed contraints:
for match in matches:
self.constrained_indices = [i[i<>match] for i in self.constrained_indices]
self.constrained_indices = [i[i <> match] for i in self.constrained_indices]
#remove empty constraints
tmp = zip(*[(i,t) for i,t in zip(self.constrained_indices,self.constraints) if len(i)])
# remove empty constraints
tmp = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)])
if tmp:
self.constrained_indices, self.constraints = zip(*[(i,t) for i,t in zip(self.constrained_indices,self.constraints) if len(i)])
self.constrained_indices, self.constraints = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)])
self.constrained_indices, self.constraints = list(self.constrained_indices), list(self.constraints)
# fixed:
self.fixed_values = [np.delete(values, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices,values in zip(self.fixed_indices,self.fixed_values)]
self.fixed_values = [np.delete(values, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices, values in zip(self.fixed_indices, self.fixed_values)]
self.fixed_indices = [np.delete(indices, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices in self.fixed_indices]
# remove empty elements
@ -189,7 +187,7 @@ class parameterised(object):
""" Set positive constraints. """
self.constrain(regexp, transformations.logexp())
def constrain_bounded(self, regexp,lower, upper):
def constrain_bounded(self, regexp, lower, upper):
""" Set bounded constraints. """
self.constrain(regexp, transformations.logistic(lower, upper))
@ -199,8 +197,8 @@ class parameterised(object):
else:
return np.empty(shape=(0,))
def constrain(self,regexp,transform):
assert isinstance(transform,transformations.transformation)
def constrain(self, regexp, transform):
assert isinstance(transform, transformations.transformation)
matches = self.grep_param_names(regexp)
overlap = set(matches).intersection(set(self.all_constrained_indices()))
@ -251,7 +249,7 @@ class parameterised(object):
def _get_params_transformed(self):
"""use self._get_params to get the 'true' parameters of the model, which are then tied, constrained and fixed"""
x = self._get_params()
[np.put(x,i,t.finv(x[i])) for i,t in zip(self.constrained_indices,self.constraints)]
[np.put(x, i, t.finv(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
to_remove = self.fixed_indices + [t[1:] for t in self.tied_indices]
if len(to_remove):
@ -263,7 +261,7 @@ class parameterised(object):
""" takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params"""
self._set_params(self._untransform_params(x))
def _untransform_params(self,x):
def _untransform_params(self, x):
"""
The transformation required for _set_params_transformed.
@ -290,9 +288,9 @@ class parameterised(object):
[np.put(xx, i, v) for i, v in zip(self.fixed_indices, self.fixed_values)]
[np.put(xx, i, v) for i, v in [(t[1:], xx[t[0]]) for t in self.tied_indices] ]
[np.put(xx,i,t.f(xx[i])) for i,t in zip(self.constrained_indices, self.constraints)]
if hasattr(self,'debug'):
stop
[np.put(xx, i, t.f(xx[i])) for i, t in zip(self.constrained_indices, self.constraints)]
if hasattr(self, 'debug'):
stop # @UndefinedVariable
return xx
@ -316,7 +314,7 @@ class parameterised(object):
remove = np.hstack((remove, np.hstack(self.fixed_indices)))
# add markers to show that some variables are constrained
for i,t in zip(self.constrained_indices,self.constraints):
for i, t in zip(self.constrained_indices, self.constraints):
for ii in i:
n[ii] = n[ii] + t.__str__()
@ -333,10 +331,10 @@ class parameterised(object):
if not N:
return "This object has no free parameters."
header = ['Name', 'Value', 'Constraints', 'Ties']
values = self._get_params() # map(str,self._get_params())
values = self._get_params() # map(str,self._get_params())
# sort out the constraints
constraints = [''] * len(names)
for i,t in zip(self.constrained_indices,self.constraints):
for i, t in zip(self.constrained_indices, self.constraints):
for ii in i:
constraints[ii] = t.__str__()
for i in self.fixed_indices:
@ -354,7 +352,7 @@ class parameterised(object):
max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])])
cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
columns = cols.sum()
# columns = cols.sum()
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string])

View file

@ -13,13 +13,13 @@ class SparseGP(GPBase):
Variational sparse GP model
:param X: inputs
:type X: np.ndarray (N x input_dim)
:type X: np.ndarray (num_data x input_dim)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
:param kernel : the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance
:param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (N x input_dim) | None
:type X_variance: np.ndarray (num_data x input_dim) | None
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None
:param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None)
@ -69,7 +69,7 @@ class SparseGP(GPBase):
# The rather complex computations of self.A
if self.has_uncertain_inputs:
if self.likelihood.is_heteroscedastic:
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0)
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.num_data, 1, 1))).sum(0)
else:
psi2_beta = self.psi2.sum(0) * self.likelihood.precision
evals, evecs = linalg.eigh(psi2_beta)
@ -77,7 +77,7 @@ class SparseGP(GPBase):
tmp = evecs * np.sqrt(clipped_evals)
else:
if self.likelihood.is_heteroscedastic:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N)))
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.num_data)))
else:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
@ -99,28 +99,28 @@ class SparseGP(GPBase):
# Compute dL_dKmm
tmp = tdot(self._LBi_Lmi_psi1V)
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.input_dim * np.eye(self.num_inducing) + tmp)
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.output_dim * np.eye(self.num_inducing) + tmp)
tmp = -0.5 * self.DBi_plus_BiPBi
tmp += -0.5 * self.B * self.input_dim
tmp += self.input_dim * np.eye(self.num_inducing)
tmp += -0.5 * self.B * self.output_dim
tmp += self.output_dim * np.eye(self.num_inducing)
self.dL_dKmm = backsub_both_sides(self.Lm, tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
self.dL_dpsi0 = -0.5 * self.input_dim * (self.likelihood.precision * np.ones([self.N, 1])).flatten()
self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.input_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.N))
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.num_data))
self.dL_dpsi2 = None
else:
dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
if self.has_uncertain_inputs:
# repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.N, axis=0)
self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.num_data, axis=0)
else:
# subsume back into psi1 (==Kmn)
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1)
@ -135,26 +135,26 @@ class SparseGP(GPBase):
raise NotImplementedError, "heteroscedatic derivates not implemented"
else:
# likelihood is not heterscedatic
self.partial_for_likelihood = -0.5 * self.N * self.input_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.input_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else:
A = -0.5 * self.N * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + B + C + D + self.likelihood.Z
def _set_params(self, p):
self.Z = p[:self.num_inducing * self.output_dim].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam])
self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:])
self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.num_params])
self.likelihood._set_params(p[self.Z.size + self.kern.num_params:])
self._compute_kernel_matrices()
self._computations()