mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-11 15:15:15 +02:00
Merge branch 'params' of github.com:SheffieldML/GPy into params
Conflicts: GPy/kern/parts/prod.py
This commit is contained in:
commit
91d8890a13
76 changed files with 2023 additions and 1429 deletions
|
|
@ -48,14 +48,13 @@ class GP(Model):
|
|||
if inference_method is None:
|
||||
if isinstance(likelihood, likelihoods.Gaussian):
|
||||
inference_method = exact_gaussian_inference.ExactGaussianInference()
|
||||
else:
|
||||
inference_method = expectation_propagation
|
||||
print "defaulting to ", inference_method, "for latent function inference"
|
||||
else:
|
||||
inference_method = expectation_propagation
|
||||
print "defaulting to ", inference_method, "for latent function inference"
|
||||
self.inference_method = inference_method
|
||||
|
||||
self.add_parameter(self.kern)
|
||||
self.add_parameter(self.likelihood)
|
||||
|
||||
self.parameters_changed()
|
||||
|
||||
def parameters_changed(self):
|
||||
|
|
@ -65,9 +64,6 @@ class GP(Model):
|
|||
def log_likelihood(self):
|
||||
return self._log_marginal_likelihood
|
||||
|
||||
def dL_dtheta_K(self):
|
||||
return self.kern.dK_dtheta(self.posterior.dL_dK, self.X)
|
||||
|
||||
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
|
||||
"""
|
||||
Internal helper function for making predictions, does not account
|
||||
|
|
@ -79,14 +75,17 @@ class GP(Model):
|
|||
|
||||
"""
|
||||
Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T
|
||||
LiKx, _ = dtrtrs(self.posterior._woodbury_chol, np.asfortranarray(Kx), lower=1)
|
||||
mu = np.dot(Kx.T, self.posterior._woodbury_vector)
|
||||
#LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1)
|
||||
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
|
||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||
if full_cov:
|
||||
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
|
||||
var = Kxx - tdot(LiKx.T)
|
||||
#var = Kxx - tdot(LiKx.T)
|
||||
var = np.dot(Kx.T, WiKx)
|
||||
else:
|
||||
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
|
||||
var = Kxx - np.sum(LiKx*LiKx, 0)
|
||||
#var = Kxx - np.sum(LiKx*LiKx, 0)
|
||||
var = Kxx - np.sum(WiKx*Kx, 0)
|
||||
var = var.reshape(-1, 1)
|
||||
return mu, var
|
||||
|
||||
|
|
@ -185,7 +184,7 @@ class GP(Model):
|
|||
from ..plotting.matplot_dep import models_plots
|
||||
models_plots.plot_fit_f(self,*args,**kwargs)
|
||||
|
||||
def plot(self, *args):
|
||||
def plot(self, *args, **kwargs):
|
||||
"""
|
||||
Plot the posterior of the GP.
|
||||
- In one dimension, the function is plotted with a shaded region
|
||||
|
|
@ -204,7 +203,7 @@ class GP(Model):
|
|||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import models_plots
|
||||
models_plots.plot_fit(self,*args)
|
||||
models_plots.plot_fit(self,*args,**kwargs)
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -170,15 +170,13 @@ class Model(Parameterized):
|
|||
# first take care of all parameters (from N(0,1))
|
||||
#x = self._get_params_transformed()
|
||||
x = np.random.randn(self.size_transformed)
|
||||
self._set_params_transformed(x)
|
||||
x = self._untransform_params(x)
|
||||
# now draw from prior where possible
|
||||
x = self._get_params()
|
||||
if self.priors is not None:
|
||||
if self.priors is not None and len(self.priors):
|
||||
[np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
|
||||
self._set_params(x)
|
||||
#self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
||||
|
||||
|
||||
def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
|
||||
"""
|
||||
Perform random restarts of the model, and set the model to the best
|
||||
|
|
@ -260,19 +258,21 @@ class Model(Parameterized):
|
|||
these terms are present in the name the parameter is
|
||||
constrained positive.
|
||||
"""
|
||||
raise DeprecationWarning, 'parameters now have default constraints'
|
||||
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
|
||||
# param_names = self._get_param_names()
|
||||
for s in positive_strings:
|
||||
paramlist = self.grep_param_names(".*"+s)
|
||||
for param in paramlist:
|
||||
for p in param.flattened_parameters:
|
||||
rav_i = set(self._raveled_index_for(p))
|
||||
for constraint in self.constraints.iter_properties():
|
||||
rav_i -= set(self._constraint_indices(p, constraint))
|
||||
rav_i -= set(np.nonzero(self._fixes_for(p)!=UNFIXED)[0])
|
||||
ind = self._backtranslate_index(p, np.array(list(rav_i), dtype=int))
|
||||
if ind.size != 0:
|
||||
p[np.unravel_index(ind, p.shape)].constrain_positive()
|
||||
|
||||
# for s in positive_strings:
|
||||
# paramlist = self.grep_param_names(".*"+s)
|
||||
# for param in paramlist:
|
||||
# for p in param.flattened_parameters:
|
||||
# rav_i = set(self._raveled_index_for(p))
|
||||
# for constraint in self.constraints.iter_properties():
|
||||
# rav_i -= set(self._constraint_indices(p, constraint))
|
||||
# rav_i -= set(np.nonzero(self._fixes_for(p)!=UNFIXED)[0])
|
||||
# ind = self._backtranslate_index(p, np.array(list(rav_i), dtype=int))
|
||||
# if ind.size != 0:
|
||||
# p[np.unravel_index(ind, p.shape)].constrain_positive(warning=warning)
|
||||
# if paramlist:
|
||||
# self.__getitem__(None, paramlist).constrain_positive(warning=warning)
|
||||
# currently_constrained = self.all_constrained_indices()
|
||||
|
|
@ -325,7 +325,6 @@ class Model(Parameterized):
|
|||
if self._fail_count >= self._allowed_failures:
|
||||
raise e
|
||||
self._fail_count += 1
|
||||
import ipdb;ipdb.set_trace()
|
||||
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
|
||||
return obj_grads
|
||||
|
||||
|
|
@ -383,7 +382,7 @@ class Model(Parameterized):
|
|||
sgd.run()
|
||||
self.optimization_runs.append(sgd)
|
||||
|
||||
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
|
||||
def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
|
||||
"""
|
||||
Check the gradient of the ,odel by comparing to a numerical
|
||||
estimate. If the verbose flag is passed, invividual
|
||||
|
|
@ -408,18 +407,18 @@ class Model(Parameterized):
|
|||
dx = step * np.sign(np.random.uniform(-1, 1, x.size))
|
||||
|
||||
# evaulate around the point x
|
||||
f1, g1 = self.objective_and_gradients(x + dx)
|
||||
f2, g2 = self.objective_and_gradients(x - dx)
|
||||
f1 = self.objective_function(x + dx)
|
||||
f2 = self.objective_function(x - dx)
|
||||
gradient = self.objective_function_gradients(x)
|
||||
|
||||
numerical_gradient = (f1 - f2) / (2 * dx)
|
||||
global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient)))
|
||||
|
||||
|
||||
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance)
|
||||
else:
|
||||
# check the gradient of each parameter individually, and do some pretty printing
|
||||
try:
|
||||
names = self._get_param_names_transformed()
|
||||
names = self._get_param_names()
|
||||
except NotImplementedError:
|
||||
names = ['Variable %i' % i for i in range(len(x))]
|
||||
# Prepare for pretty-printing
|
||||
|
|
@ -433,39 +432,46 @@ class Model(Parameterized):
|
|||
header_string = map(lambda x: '|'.join(x), [header_string])
|
||||
separator = '-' * len(header_string[0])
|
||||
print '\n'.join([header_string[0], separator])
|
||||
|
||||
if target_param is None:
|
||||
param_list = range(len(x))
|
||||
else:
|
||||
param_list = self.grep_param_names(target_param, transformed=True, search=True)
|
||||
if not np.any(param_list):
|
||||
param_list = self._raveled_index_for(target_param)
|
||||
if self._has_fixes():
|
||||
param_list = np.intersect1d(param_list, np.r_[:self.size][self._fixes_], True)
|
||||
|
||||
if param_list.size == 0:
|
||||
print "No free parameters to check"
|
||||
return
|
||||
|
||||
|
||||
for i in param_list:
|
||||
gradient = self.objective_function_gradients(x)
|
||||
np.where(gradient==0, 1e-312, gradient)
|
||||
ret = True
|
||||
for i, ind in enumerate(param_list):
|
||||
xx = x.copy()
|
||||
xx[i] += step
|
||||
f1, g1 = self.objective_and_gradients(xx)
|
||||
xx[i] -= 2.*step
|
||||
f2, g2 = self.objective_and_gradients(xx)
|
||||
gradient = self.objective_function_gradients(x)[i]
|
||||
|
||||
xx[ind] += step
|
||||
f1 = self.objective_function(xx)
|
||||
xx[ind] -= 2.*step
|
||||
f2 = self.objective_function(xx)
|
||||
|
||||
numerical_gradient = (f1 - f2) / (2 * step)
|
||||
ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient))
|
||||
difference = np.abs((f1 - f2) / 2 / step - gradient)
|
||||
ratio = (f1 - f2) / (2 * step * gradient[ind])
|
||||
difference = np.abs((f1 - f2) / 2 / step - gradient[ind])
|
||||
|
||||
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
|
||||
formatted_name = "\033[92m {0} \033[0m".format(names[i])
|
||||
formatted_name = "\033[92m {0} \033[0m".format(names[ind])
|
||||
ret &= True
|
||||
else:
|
||||
formatted_name = "\033[91m {0} \033[0m".format(names[i])
|
||||
formatted_name = "\033[91m {0} \033[0m".format(names[ind])
|
||||
ret &= False
|
||||
|
||||
r = '%.6f' % float(ratio)
|
||||
d = '%.6f' % float(difference)
|
||||
g = '%.6f' % gradient
|
||||
g = '%.6f' % gradient[ind]
|
||||
ng = '%.6f' % float(numerical_gradient)
|
||||
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
|
||||
print grad_string
|
||||
|
||||
return ret
|
||||
|
||||
def input_sensitivity(self):
|
||||
"""
|
||||
return an array describing the sesitivity of the model to each input
|
||||
|
|
|
|||
|
|
@ -12,11 +12,22 @@ class ListArray(np.ndarray):
|
|||
WARNING: This overrides the functionality of x==y!!!
|
||||
Use numpy.equal(x,y) for element-wise equality testing.
|
||||
"""
|
||||
|
||||
def __new__(cls, input_array):
|
||||
obj = np.asanyarray(input_array).view(cls)
|
||||
return obj
|
||||
def __eq__(self, other):
|
||||
return other is self
|
||||
#def __eq__(self, other):
|
||||
# return other is self
|
||||
|
||||
class ParamList(list):
|
||||
|
||||
def __contains__(self, other):
|
||||
for el in self:
|
||||
if el is other:
|
||||
return True
|
||||
return False
|
||||
|
||||
pass
|
||||
|
||||
class ObservableArray(ListArray, Observable):
|
||||
"""
|
||||
|
|
@ -25,8 +36,9 @@ class ObservableArray(ListArray, Observable):
|
|||
will be called every time this array changes. The callable
|
||||
takes exactly one argument, which is this array itself.
|
||||
"""
|
||||
__array_priority__ = 0 # Never give back Param
|
||||
__array_priority__ = -1 # Never give back ObservableArray
|
||||
def __new__(cls, input_array):
|
||||
cls.__name__ = "ObservableArray\n "
|
||||
obj = super(ObservableArray, cls).__new__(cls, input_array).view(cls)
|
||||
obj._observers_ = {}
|
||||
return obj
|
||||
|
|
@ -34,18 +46,168 @@ class ObservableArray(ListArray, Observable):
|
|||
# see InfoArray.__array_finalize__ for comments
|
||||
if obj is None: return
|
||||
self._observers_ = getattr(obj, '_observers_', None)
|
||||
|
||||
def __setitem__(self, s, val, update=True):
|
||||
if self.ndim:
|
||||
if not np.all(np.equal(self[s], val)):
|
||||
super(ObservableArray, self).__setitem__(s, val)
|
||||
if update:
|
||||
self._notify_observers()
|
||||
else:
|
||||
if not np.all(np.equal(self, val)):
|
||||
super(ObservableArray, self).__setitem__(Ellipsis, val)
|
||||
if update:
|
||||
self._notify_observers()
|
||||
super(ObservableArray, self).__setitem__(s, val)
|
||||
if update:
|
||||
self._notify_observers()
|
||||
def __getslice__(self, start, stop):
|
||||
return self.__getitem__(slice(start, stop))
|
||||
def __setslice__(self, start, stop, val):
|
||||
return self.__setitem__(slice(start, stop), val)
|
||||
return self.__setitem__(slice(start, stop), val)
|
||||
|
||||
def __copy__(self, *args):
|
||||
return ObservableArray(self.base.base.copy(*args))
|
||||
def copy(self, *args):
|
||||
return self.__copy__(*args)
|
||||
|
||||
def __ror__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ror__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __ilshift__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ilshift__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __irshift__(self, *args, **kwargs):
|
||||
r = np.ndarray.__irshift__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rrshift__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rrshift__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __ixor__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ixor__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rxor__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rxor__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
|
||||
def __rdivmod__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rdivmod__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __radd__(self, *args, **kwargs):
|
||||
r = np.ndarray.__radd__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rdiv__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rdiv__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rtruediv__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rtruediv__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __ipow__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ipow__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rmul__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rmul__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rpow__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rpow__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rsub__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rsub__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __ifloordiv__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ifloordiv__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __isub__(self, *args, **kwargs):
|
||||
r = np.ndarray.__isub__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __ior__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ior__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __itruediv__(self, *args, **kwargs):
|
||||
r = np.ndarray.__itruediv__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __idiv__(self, *args, **kwargs):
|
||||
r = np.ndarray.__idiv__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rfloordiv__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rfloordiv__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __iand__(self, *args, **kwargs):
|
||||
r = np.ndarray.__iand__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __imod__(self, *args, **kwargs):
|
||||
r = np.ndarray.__imod__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __iadd__(self, *args, **kwargs):
|
||||
r = np.ndarray.__iadd__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __imul__(self, *args, **kwargs):
|
||||
r = np.ndarray.__imul__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __rshift__(self, *args, **kwargs):
|
||||
r = np.ndarray.__rshift__(self, *args, **kwargs)
|
||||
self._notify_observers()
|
||||
return r
|
||||
|
||||
|
|
|
|||
|
|
@ -77,6 +77,12 @@ class ParameterIndexOperations(object):
|
|||
def iter_properties(self):
|
||||
return self._properties.iterkeys()
|
||||
|
||||
def shift(self, start, size):
|
||||
for ind in self.iterindices():
|
||||
toshift = ind>=start
|
||||
if len(toshift) > 0:
|
||||
ind[toshift] += size
|
||||
|
||||
def clear(self):
|
||||
self._properties.clear()
|
||||
|
||||
|
|
@ -90,11 +96,6 @@ class ParameterIndexOperations(object):
|
|||
return self._properties.values()
|
||||
|
||||
def properties_for(self, index):
|
||||
# already_seen = dict()
|
||||
# for ni in index:
|
||||
# if ni not in already_seen:
|
||||
# already_seen[ni] = [prop for prop in self.iter_properties() if ni in self._properties[prop]]
|
||||
# yield already_seen[ni]
|
||||
return vectorize(lambda i: [prop for prop in self.iter_properties() if i in self._properties[prop]], otypes=[list])(index)
|
||||
|
||||
def add(self, prop, indices):
|
||||
|
|
@ -111,70 +112,11 @@ class ParameterIndexOperations(object):
|
|||
self._properties[prop] = diff
|
||||
else:
|
||||
del self._properties[prop]
|
||||
#[self._reverse[i].remove(prop) for i in removed if prop in self._reverse[i]]
|
||||
return removed.astype(int)
|
||||
# else:
|
||||
# for a in self.properties():
|
||||
# if numpy.all(a==prop) and a._parent_index_ == prop._parent_index_:
|
||||
# ind = create_raveled_indices(indices, shape, offset)
|
||||
# diff = remove_indices(self[a], ind)
|
||||
# removed = numpy.intersect1d(self[a], ind, True)
|
||||
# if not index_empty(diff):
|
||||
# self._properties[a] = diff
|
||||
# else:
|
||||
# del self._properties[a]
|
||||
# [self._reverse[i].remove(a) for i in removed if a in self._reverse[i]]
|
||||
# return removed.astype(int)
|
||||
return numpy.array([]).astype(int)
|
||||
def __getitem__(self, prop):
|
||||
return self._properties[prop]
|
||||
|
||||
# class TieIndexOperations(object):
|
||||
# def __init__(self, params):
|
||||
# self.params = params
|
||||
# self.tied_from = ParameterIndexOperations()
|
||||
# self.tied_to = ParameterIndexOperations()
|
||||
# def add(self, tied_from, tied_to):
|
||||
# rav_from = self.params._raveled_index_for(tied_from)
|
||||
# rav_to = self.params._raveled_index_for(tied_to)
|
||||
# self.tied_from.add(tied_to, rav_from)
|
||||
# self.tied_to.add(tied_to, rav_to)
|
||||
# return rav_from, rav_to
|
||||
# def remove(self, tied_from, tied_to):
|
||||
# rav_from = self.params._raveled_index_for(tied_from)
|
||||
# rav_to = self.params._raveled_index_for(tied_to)
|
||||
# rem_from = self.tied_from.remove(tied_to, rav_from)
|
||||
# rem_to = self.tied_to.remove(tied_to, rav_to)
|
||||
# left_from = self.tied_from._properties.pop(tied_to)
|
||||
# left_to = self.tied_to._properties.pop(tied_to)
|
||||
# self.tied_from[numpy.delete(tied_to, rem_from)] = left_from
|
||||
# self.tied_to[numpy.delete(tied_to, rem_to)] = left_to
|
||||
# return rav_from, rav_to
|
||||
# def from_to_for(self, index):
|
||||
# return self.tied_from.properties_for(index), self.tied_to.properties_for(index)
|
||||
# def iter_from_to_indices(self):
|
||||
# for k, f in self.tied_from.iteritems():
|
||||
# yield f, self.tied_to[k]
|
||||
# def iter_to_indices(self):
|
||||
# return self.tied_to.iterindices()
|
||||
# def iter_from_indices(self):
|
||||
# return self.tied_from.iterindices()
|
||||
# def iter_from_items(self):
|
||||
# for f, i in self.tied_from.iteritems():
|
||||
# yield f, i
|
||||
# def iter_properties(self):
|
||||
# return self.tied_from.iter_properties()
|
||||
# def properties(self):
|
||||
# return self.tied_from.properties()
|
||||
# def from_to_indices(self, param):
|
||||
# return self.tied_from[param], self.tied_to[param]
|
||||
#
|
||||
# # def create_raveled_indices(index, shape, offset=0):
|
||||
# # if isinstance(index, (tuple, list)): i = [slice(None)] + list(index)
|
||||
# # else: i = [slice(None), index]
|
||||
# # ind = numpy.array(numpy.ravel_multi_index(numpy.indices(shape)[i], shape)).flat + numpy.int_(offset)
|
||||
# # return ind
|
||||
|
||||
def combine_indices(arr1, arr2):
|
||||
return numpy.union1d(arr1, arr2)
|
||||
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
import itertools
|
||||
import numpy
|
||||
from parameter_core import Constrainable, adjust_name_for_printing
|
||||
from array_core import ObservableArray
|
||||
from parameter_core import Constrainable, Gradcheckable, adjust_name_for_printing
|
||||
from array_core import ObservableArray, ParamList
|
||||
|
||||
###### printing
|
||||
__constraints_name__ = "Constraint"
|
||||
|
|
@ -12,47 +12,41 @@ __index_name__ = "Index"
|
|||
__tie_name__ = "Tied to"
|
||||
__precision__ = numpy.get_printoptions()['precision'] # numpy printing precision used, sublassing numpy ndarray after all
|
||||
__print_threshold__ = 5
|
||||
######
|
||||
######
|
||||
|
||||
class Float(numpy.float64, Constrainable):
|
||||
def __init__(self, f, base):
|
||||
super(Float,self).__init__(f)
|
||||
self._base = base
|
||||
|
||||
|
||||
class Param(ObservableArray, Constrainable):
|
||||
|
||||
|
||||
class Param(ObservableArray, Constrainable, Gradcheckable):
|
||||
"""
|
||||
Parameter object for GPy models.
|
||||
|
||||
:param name: name of the parameter to be printed
|
||||
:param input_array: array which this parameter handles
|
||||
|
||||
:param str name: name of the parameter to be printed
|
||||
:param input_array: array which this parameter handles
|
||||
:type input_array: numpy.ndarray
|
||||
:param default_constraint: The default constraint for this parameter
|
||||
:type default_constraint:
|
||||
|
||||
You can add/remove constraints by calling constrain on the parameter itself, e.g:
|
||||
|
||||
|
||||
- self[:,1].constrain_positive()
|
||||
- self[0].tie_to(other)
|
||||
- self.untie()
|
||||
- self[:3,:].unconstrain()
|
||||
- self[1].fix()
|
||||
|
||||
|
||||
Fixing parameters will fix them to the value they are right now. If you change
|
||||
the fixed value, it will be fixed to the new value!
|
||||
|
||||
|
||||
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
|
||||
|
||||
This ndarray can be stored in lists and checked if it is in.
|
||||
|
||||
>>> import numpy as np
|
||||
>>> x = np.random.normal(size=(10,3))
|
||||
>>> x in [[1], x, [3]]
|
||||
True
|
||||
|
||||
WARNING: This overrides the functionality of x==y!!!
|
||||
Use numpy.equal(x,y) for element-wise equality testing.
|
||||
"""
|
||||
__array_priority__ = 0 # Never give back Param
|
||||
__array_priority__ = -1 # Never give back Param
|
||||
_fixes_ = None
|
||||
def __new__(cls, name, input_array, *args, **kwargs):
|
||||
def __new__(cls, name, input_array, default_constraint=None):
|
||||
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
|
||||
obj._current_slice_ = (slice(obj.shape[0]),)
|
||||
obj._realshape_ = obj.shape
|
||||
|
|
@ -66,16 +60,16 @@ class Param(ObservableArray, Constrainable):
|
|||
obj.gradient = None
|
||||
return obj
|
||||
|
||||
def __init__(self, name, input_array):
|
||||
super(Param, self).__init__(name=name)
|
||||
|
||||
def __init__(self, name, input_array, default_constraint=None):
|
||||
super(Param, self).__init__(name=name, default_constraint=default_constraint)
|
||||
|
||||
def __array_finalize__(self, obj):
|
||||
# see InfoArray.__array_finalize__ for comments
|
||||
if obj is None: return
|
||||
super(Param, self).__array_finalize__(obj)
|
||||
self._direct_parent_ = getattr(obj, '_direct_parent_', None)
|
||||
self._parent_index_ = getattr(obj, '_parent_index_', None)
|
||||
self._highest_parent_ = getattr(obj, '_highest_parent_', None)
|
||||
self._default_constraint_ = getattr(obj, '_default_constraint_', None)
|
||||
self._current_slice_ = getattr(obj, '_current_slice_', None)
|
||||
self._tied_to_me_ = getattr(obj, '_tied_to_me_', None)
|
||||
self._tied_to_ = getattr(obj, '_tied_to_', None)
|
||||
|
|
@ -86,7 +80,7 @@ class Param(ObservableArray, Constrainable):
|
|||
self._original_ = getattr(obj, '_original_', None)
|
||||
self._name = getattr(obj, 'name', None)
|
||||
self.gradient = getattr(obj, 'gradient', None)
|
||||
|
||||
|
||||
def __array_wrap__(self, out_arr, context=None):
|
||||
return out_arr.view(numpy.ndarray)
|
||||
#===========================================================================
|
||||
|
|
@ -94,11 +88,11 @@ class Param(ObservableArray, Constrainable):
|
|||
#===========================================================================
|
||||
def __reduce_ex__(self):
|
||||
func, args, state = super(Param, self).__reduce__()
|
||||
return func, args, (state,
|
||||
return func, args, (state,
|
||||
(self.name,
|
||||
self._direct_parent_,
|
||||
self._parent_index_,
|
||||
self._highest_parent_,
|
||||
self._default_constraint_,
|
||||
self._current_slice_,
|
||||
self._realshape_,
|
||||
self._realsize_,
|
||||
|
|
@ -119,7 +113,7 @@ class Param(ObservableArray, Constrainable):
|
|||
self._realsize_ = state.pop()
|
||||
self._realshape_ = state.pop()
|
||||
self._current_slice_ = state.pop()
|
||||
self._highest_parent_ = state.pop()
|
||||
self._default_constraint_ = state.pop()
|
||||
self._parent_index_ = state.pop()
|
||||
self._direct_parent_ = state.pop()
|
||||
self.name = state.pop()
|
||||
|
|
@ -132,13 +126,13 @@ class Param(ObservableArray, Constrainable):
|
|||
self.flat = param
|
||||
self._notify_tied_parameters()
|
||||
self._notify_observers()
|
||||
|
||||
|
||||
def _get_params(self):
|
||||
return self.flat
|
||||
# @property
|
||||
# def name(self):
|
||||
# """
|
||||
# Name of this parameter.
|
||||
# Name of this parameter.
|
||||
# This can be a callable without parameters. The callable will be called
|
||||
# every time the name property is accessed.
|
||||
# """
|
||||
|
|
@ -156,23 +150,6 @@ class Param(ObservableArray, Constrainable):
|
|||
def _collect_gradient(self, target):
|
||||
target[:] = self.gradient.flat
|
||||
#===========================================================================
|
||||
# Fixing Parameters:
|
||||
#===========================================================================
|
||||
def constrain_fixed(self, warning=True):
|
||||
"""
|
||||
Constrain this paramter to be fixed to the current value it carries.
|
||||
|
||||
:param warning: print a warning for overwriting constraints.
|
||||
"""
|
||||
self._highest_parent_._fix(self,warning)
|
||||
fix = constrain_fixed
|
||||
def unconstrain_fixed(self):
|
||||
"""
|
||||
This parameter will no longer be fixed.
|
||||
"""
|
||||
self._highest_parent_._unfix(self)
|
||||
unfix = unconstrain_fixed
|
||||
#===========================================================================
|
||||
# Tying operations -> bugged, TODO
|
||||
#===========================================================================
|
||||
def tie_to(self, param):
|
||||
|
|
@ -188,22 +165,22 @@ class Param(ObservableArray, Constrainable):
|
|||
Note: For now only one parameter can have ties, so all of a parameter
|
||||
will be removed, when re-tieing!
|
||||
"""
|
||||
#Note: this method will tie to the parameter which is the last in
|
||||
#Note: this method will tie to the parameter which is the last in
|
||||
# the chain of ties. Thus, if you tie to a tied parameter,
|
||||
# this tie will be created to the parameter the param is tied
|
||||
# to.
|
||||
|
||||
assert isinstance(param, Param), "Argument {1} not of type {0}".format(Param,param.__class__)
|
||||
assert isinstance(param, Param), "Argument {1} not of type {0}".format(Param, param.__class__)
|
||||
param = numpy.atleast_1d(param)
|
||||
if param.size != 1:
|
||||
raise NotImplementedError, "Broadcast tying is not implemented yet"
|
||||
try:
|
||||
if self._original_:
|
||||
if self._original_:
|
||||
self[:] = param
|
||||
else: # this happens when indexing created a copy of the array
|
||||
self._direct_parent_._get_original(self)[self._current_slice_] = param
|
||||
except ValueError:
|
||||
raise ValueError("Trying to tie {} with shape {} to {} with shape {}".format(self.name, self.shape, param.name, param.shape))
|
||||
raise ValueError("Trying to tie {} with shape {} to {} with shape {}".format(self.name, self.shape, param.name, param.shape))
|
||||
if param is self:
|
||||
raise RuntimeError, 'Cyclic tieing is not allowed'
|
||||
# if len(param._tied_to_) > 0:
|
||||
|
|
@ -246,7 +223,7 @@ class Param(ObservableArray, Constrainable):
|
|||
t_rav_i = t._raveled_index()
|
||||
tr_rav_i = tied_to_me._raveled_index()
|
||||
new_index = list(set(t_rav_i) | set(tr_rav_i))
|
||||
tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index,t._realshape_)]
|
||||
tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index, t._realshape_)]
|
||||
self._tied_to_me_[tmp] = self._tied_to_me_[t] | set(self._raveled_index())
|
||||
del self._tied_to_me_[t]
|
||||
return
|
||||
|
|
@ -259,7 +236,7 @@ class Param(ObservableArray, Constrainable):
|
|||
import ipdb;ipdb.set_trace()
|
||||
new_index = list(set(t_rav_i) - set(tr_rav_i))
|
||||
if new_index:
|
||||
tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index,t._realshape_)]
|
||||
tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index, t._realshape_)]
|
||||
self._tied_to_me_[tmp] = self._tied_to_me_[t]
|
||||
del self._tied_to_me_[t]
|
||||
if len(self._tied_to_me_[tmp]) == 0:
|
||||
|
|
@ -267,12 +244,12 @@ class Param(ObservableArray, Constrainable):
|
|||
else:
|
||||
del self._tied_to_me_[t]
|
||||
def _on_tied_parameter_changed(self, val, ind):
|
||||
if not self._updated_: #not fast_array_equal(self, val[ind]):
|
||||
if not self._updated_: # not fast_array_equal(self, val[ind]):
|
||||
val = numpy.atleast_1d(val)
|
||||
self._updated_ = True
|
||||
if self._original_:
|
||||
self.__setitem__(slice(None), val[ind], update=False)
|
||||
else: # this happens when indexing created a copy of the array
|
||||
else: # this happens when indexing created a copy of the array
|
||||
self._direct_parent_._get_original(self).__setitem__(self._current_slice_, val[ind], update=False)
|
||||
self._notify_tied_parameters()
|
||||
self._updated_ = False
|
||||
|
|
@ -291,7 +268,7 @@ class Param(ObservableArray, Constrainable):
|
|||
def unset_prior(self, *priors):
|
||||
"""
|
||||
:param priors: priors to remove from this parameter
|
||||
|
||||
|
||||
Remove all priors from this parameter
|
||||
"""
|
||||
self._highest_parent_._remove_prior(self, *priors)
|
||||
|
|
@ -301,11 +278,11 @@ class Param(ObservableArray, Constrainable):
|
|||
def __getitem__(self, s, *args, **kwargs):
|
||||
if not isinstance(s, tuple):
|
||||
s = (s,)
|
||||
if not reduce(lambda a,b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim:
|
||||
if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim:
|
||||
s += (Ellipsis,)
|
||||
new_arr = super(Param, self).__getitem__(s, *args, **kwargs)
|
||||
try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base
|
||||
except AttributeError: pass# returning 0d array or float, double etc
|
||||
except AttributeError: pass # returning 0d array or float, double etc
|
||||
return new_arr
|
||||
def __setitem__(self, s, val, update=True):
|
||||
super(Param, self).__setitem__(s, val, update=update)
|
||||
|
|
@ -322,12 +299,12 @@ class Param(ObservableArray, Constrainable):
|
|||
if numpy.all(si == Ellipsis):
|
||||
continue
|
||||
if isinstance(si, slice):
|
||||
a = si.indices(self._realshape_[i])[0]
|
||||
a = si.indices(self._realshape_[i])[0]
|
||||
elif isinstance(si, (list,numpy.ndarray,tuple)):
|
||||
a = si[0]
|
||||
else: a = si
|
||||
if a<0:
|
||||
a = self._realshape_[i]+a
|
||||
if a < 0:
|
||||
a = self._realshape_[i] + a
|
||||
internal_offset += a * extended_realshape[i]
|
||||
return internal_offset
|
||||
def _raveled_index(self, slice_index=None):
|
||||
|
|
@ -335,8 +312,8 @@ class Param(ObservableArray, Constrainable):
|
|||
# of this object
|
||||
extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1]
|
||||
ind = self._indices(slice_index)
|
||||
if ind.ndim < 2: ind=ind[:,None]
|
||||
return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape*x), 1, ind), dtype=int)
|
||||
if ind.ndim < 2: ind = ind[:, None]
|
||||
return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int)
|
||||
def _expand_index(self, slice_index=None):
|
||||
# this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes
|
||||
# it basically translates slices to their respective index arrays and turns negative indices around
|
||||
|
|
@ -349,11 +326,11 @@ class Param(ObservableArray, Constrainable):
|
|||
if isinstance(a, slice):
|
||||
start, stop, step = a.indices(b)
|
||||
return numpy.r_[start:stop:step]
|
||||
elif isinstance(a, (list,numpy.ndarray,tuple)):
|
||||
elif isinstance(a, (list, numpy.ndarray, tuple)):
|
||||
a = numpy.asarray(a, dtype=int)
|
||||
a[a<0] = b + a[a<0]
|
||||
elif a<0:
|
||||
a = b+a
|
||||
a[a < 0] = b + a[a < 0]
|
||||
elif a < 0:
|
||||
a = b + a
|
||||
return numpy.r_[a]
|
||||
return numpy.r_[:b]
|
||||
return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size)))
|
||||
|
|
@ -377,7 +354,7 @@ class Param(ObservableArray, Constrainable):
|
|||
#===========================================================================
|
||||
@property
|
||||
def _description_str(self):
|
||||
if self.size <= 1: return ["%f"%self]
|
||||
if self.size <= 1: return ["%f" % self]
|
||||
else: return [str(self.shape)]
|
||||
def _parameter_names(self, add_name):
|
||||
return [self.name]
|
||||
|
|
@ -389,31 +366,31 @@ class Param(ObservableArray, Constrainable):
|
|||
return [self.shape]
|
||||
@property
|
||||
def _constraints_str(self):
|
||||
return [' '.join(map(lambda c: str(c[0]) if c[1].size==self._realsize_ else "{"+str(c[0])+"}", self._highest_parent_._constraints_iter_items(self)))]
|
||||
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self._highest_parent_._constraints_iter_items(self)))]
|
||||
@property
|
||||
def _ties_str(self):
|
||||
return [t._short() for t in self._tied_to_] or ['']
|
||||
@property
|
||||
def name_hirarchical(self):
|
||||
if self.has_parent():
|
||||
return self._direct_parent_.hirarchy_name()+adjust_name_for_printing(self.name)
|
||||
return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name)
|
||||
return adjust_name_for_printing(self.name)
|
||||
def __repr__(self, *args, **kwargs):
|
||||
name = "\033[1m{x:s}\033[0;0m:\n".format(
|
||||
x=self.name_hirarchical)
|
||||
return name + super(Param, self).__repr__(*args,**kwargs)
|
||||
return name + super(Param, self).__repr__(*args, **kwargs)
|
||||
def _ties_for(self, rav_index):
|
||||
#size = sum(p.size for p in self._tied_to_)
|
||||
# size = sum(p.size for p in self._tied_to_)
|
||||
ties = numpy.empty(shape=(len(self._tied_to_), numpy.size(rav_index)), dtype=Param)
|
||||
for i, tied_to in enumerate(self._tied_to_):
|
||||
for t, ind in tied_to._tied_to_me_.iteritems():
|
||||
if t._parent_index_ == self._parent_index_:
|
||||
matches = numpy.where(rav_index[:,None] == t._raveled_index()[None, :])
|
||||
matches = numpy.where(rav_index[:, None] == t._raveled_index()[None, :])
|
||||
tt_rav_index = tied_to._raveled_index()
|
||||
ind_rav_matches = numpy.where(tt_rav_index == numpy.array(list(ind)))[0]
|
||||
if len(ind) != 1: ties[i, matches[0][ind_rav_matches]] = numpy.take(tt_rav_index, matches[1], mode='wrap')[ind_rav_matches]
|
||||
else: ties[i, matches[0]] = numpy.take(tt_rav_index, matches[1], mode='wrap')
|
||||
return map(lambda a: sum(a,[]), zip(*[[[tie.flatten()] if tx!=None else [] for tx in t] for t,tie in zip(ties,self._tied_to_)]))
|
||||
return map(lambda a: sum(a, []), zip(*[[[tie.flatten()] if tx != None else [] for tx in t] for t, tie in zip(ties, self._tied_to_)]))
|
||||
def _constraints_for(self, rav_index):
|
||||
return self._highest_parent_._constraints_for(self, rav_index)
|
||||
def _indices(self, slice_index=None):
|
||||
|
|
@ -422,13 +399,13 @@ class Param(ObservableArray, Constrainable):
|
|||
slice_index = self._current_slice_
|
||||
if isinstance(slice_index, (tuple, list)):
|
||||
clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
|
||||
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
|
||||
and len(set(map(len,clean_curr_slice))) <= 1):
|
||||
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
|
||||
and len(set(map(len, clean_curr_slice))) <= 1):
|
||||
return numpy.fromiter(itertools.izip(*clean_curr_slice),
|
||||
dtype=[('',int)]*self._realndim_,count=len(clean_curr_slice[0])).view((int, self._realndim_))
|
||||
dtype=[('', int)] * self._realndim_, count=len(clean_curr_slice[0])).view((int, self._realndim_))
|
||||
expanded_index = list(self._expand_index(slice_index))
|
||||
return numpy.fromiter(itertools.product(*expanded_index),
|
||||
dtype=[('',int)]*self._realndim_,count=reduce(lambda a,b: a*b.size,expanded_index,1)).view((int, self._realndim_))
|
||||
dtype=[('', int)] * self._realndim_, count=reduce(lambda a, b: a * b.size, expanded_index, 1)).view((int, self._realndim_))
|
||||
def _max_len_names(self, gen, header):
|
||||
return reduce(lambda a, b:max(a, len(b)), gen, len(header))
|
||||
def _max_len_values(self):
|
||||
|
|
@ -441,9 +418,9 @@ class Param(ObservableArray, Constrainable):
|
|||
if self._realsize_ < 2:
|
||||
return name
|
||||
ind = self._indices()
|
||||
if ind.size > 4: indstr = ','.join(map(str,ind[:2])) + "..." + ','.join(map(str,ind[-2:]))
|
||||
else: indstr = ','.join(map(str,ind))
|
||||
return name+'['+indstr+']'
|
||||
if ind.size > 4: indstr = ','.join(map(str, ind[:2])) + "..." + ','.join(map(str, ind[-2:]))
|
||||
else: indstr = ','.join(map(str, ind))
|
||||
return name + '[' + indstr + ']'
|
||||
def __str__(self, constr_matrix=None, indices=None, ties=None, lc=None, lx=None, li=None, lt=None):
|
||||
filter_ = self._current_slice_
|
||||
vals = self.flat
|
||||
|
|
@ -456,10 +433,10 @@ class Param(ObservableArray, Constrainable):
|
|||
if lx is None: lx = self._max_len_values()
|
||||
if li is None: li = self._max_len_index(indices)
|
||||
if lt is None: lt = self._max_len_names(ties, __tie_name__)
|
||||
header = " {i:^{2}s} | \033[1m{x:^{1}s}\033[0;0m | {c:^{0}s} | {t:^{3}s}".format(lc,lx,li,lt, x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__) # nice header for printing
|
||||
header = " {i:^{2}s} | \033[1m{x:^{1}s}\033[0;0m | {c:^{0}s} | {t:^{3}s}".format(lc, lx, li, lt, x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__) # nice header for printing
|
||||
if not ties: ties = itertools.cycle([''])
|
||||
return "\n".join([header]+[" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {t:^{4}s} ".format(lc,lx,__precision__,li,lt, x=x, c=" ".join(map(str,c)), t=(t or ''), i=i) for i,x,c,t in itertools.izip(indices,vals,constr_matrix,ties)]) # return all the constraints with right indices
|
||||
#except: return super(Param, self).__str__()
|
||||
return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, x=x, c=" ".join(map(str, c)), t=(t or ''), i=i) for i, x, c, t in itertools.izip(indices, vals, constr_matrix, ties)]) # return all the constraints with right indices
|
||||
# except: return super(Param, self).__str__()
|
||||
|
||||
class ParamConcatenation(object):
|
||||
def __init__(self, params):
|
||||
|
|
@ -470,12 +447,12 @@ class ParamConcatenation(object):
|
|||
|
||||
See :py:class:`GPy.core.parameter.Param` for more details on constraining.
|
||||
"""
|
||||
#self.params = params
|
||||
self.params = []
|
||||
# self.params = params
|
||||
self.params = ParamList([])
|
||||
for p in params:
|
||||
for p in p.flattened_parameters:
|
||||
if p not in self.params:
|
||||
self.params.append(p)
|
||||
self.params.append(p)
|
||||
self._param_sizes = [p.size for p in self.params]
|
||||
startstops = numpy.cumsum([0] + self._param_sizes)
|
||||
self._param_slices_ = [slice(start, stop) for start,stop in zip(startstops, startstops[1:])]
|
||||
|
|
@ -483,15 +460,15 @@ class ParamConcatenation(object):
|
|||
# Get/set items, enable broadcasting
|
||||
#===========================================================================
|
||||
def __getitem__(self, s):
|
||||
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
|
||||
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
|
||||
params = [p._get_params()[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p._get_params()[ind[ps]])]
|
||||
if len(params)==1: return params[0]
|
||||
return ParamConcatenation(params)
|
||||
def __setitem__(self, s, val, update=True):
|
||||
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
|
||||
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
|
||||
vals = self._vals(); vals[s] = val; del val
|
||||
[numpy.place(p, ind[ps], vals[ps]) and p._notify_tied_parameters()
|
||||
for p, ps in zip(self.params, self._param_slices_)]
|
||||
[numpy.place(p, ind[ps], vals[ps]) and p._notify_tied_parameters()
|
||||
for p, ps in zip(self.params, self._param_slices_)]
|
||||
if update:
|
||||
self.params[0]._highest_parent_.parameters_changed()
|
||||
def _vals(self):
|
||||
|
|
@ -499,46 +476,68 @@ class ParamConcatenation(object):
|
|||
#===========================================================================
|
||||
# parameter operations:
|
||||
#===========================================================================
|
||||
def update_all_params(self):
|
||||
self.params[0]._highest_parent_.parameters_changed()
|
||||
|
||||
def constrain(self, constraint, warning=True):
|
||||
[param.constrain(constraint) for param in self.params]
|
||||
[param.constrain(constraint, update=False) for param in self.params]
|
||||
self.update_all_params()
|
||||
constrain.__doc__ = Param.constrain.__doc__
|
||||
|
||||
def constrain_positive(self, warning=True):
|
||||
[param.constrain_positive(warning) for param in self.params]
|
||||
[param.constrain_positive(warning, update=False) for param in self.params]
|
||||
self.update_all_params()
|
||||
constrain_positive.__doc__ = Param.constrain_positive.__doc__
|
||||
|
||||
def constrain_fixed(self, warning=True):
|
||||
[param.constrain_fixed(warning) for param in self.params]
|
||||
constrain_fixed.__doc__ = Param.constrain_fixed.__doc__
|
||||
fix = constrain_fixed
|
||||
|
||||
def constrain_negative(self, warning=True):
|
||||
[param.constrain_negative(warning) for param in self.params]
|
||||
[param.constrain_negative(warning, update=False) for param in self.params]
|
||||
self.update_all_params()
|
||||
constrain_negative.__doc__ = Param.constrain_negative.__doc__
|
||||
|
||||
def constrain_bounded(self, lower, upper, warning=True):
|
||||
[param.constrain_bounded(lower, upper, warning) for param in self.params]
|
||||
[param.constrain_bounded(lower, upper, warning, update=False) for param in self.params]
|
||||
self.update_all_params()
|
||||
constrain_bounded.__doc__ = Param.constrain_bounded.__doc__
|
||||
|
||||
def unconstrain(self, *constraints):
|
||||
[param.unconstrain(*constraints) for param in self.params]
|
||||
unconstrain.__doc__ = Param.unconstrain.__doc__
|
||||
|
||||
def unconstrain_negative(self):
|
||||
[param.unconstrain_negative() for param in self.params]
|
||||
unconstrain_negative.__doc__ = Param.unconstrain_negative.__doc__
|
||||
|
||||
def unconstrain_positive(self):
|
||||
[param.unconstrain_positive() for param in self.params]
|
||||
unconstrain_positive.__doc__ = Param.unconstrain_positive.__doc__
|
||||
|
||||
def unconstrain_fixed(self):
|
||||
[param.unconstrain_fixed() for param in self.params]
|
||||
unconstrain_fixed.__doc__ = Param.unconstrain_fixed.__doc__
|
||||
unfix = unconstrain_fixed
|
||||
|
||||
def unconstrain_bounded(self, lower, upper):
|
||||
[param.unconstrain_bounded(lower, upper) for param in self.params]
|
||||
unconstrain_bounded.__doc__ = Param.unconstrain_bounded.__doc__
|
||||
|
||||
def untie(self, *ties):
|
||||
[param.untie(*ties) for param in self.params]
|
||||
__lt__ = lambda self, val: self._vals()<val
|
||||
__le__ = lambda self, val: self._vals()<=val
|
||||
__eq__ = lambda self, val: self._vals()==val
|
||||
__ne__ = lambda self, val: self._vals()!=val
|
||||
__gt__ = lambda self, val: self._vals()>val
|
||||
__ge__ = lambda self, val: self._vals()>=val
|
||||
|
||||
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
|
||||
return self.params[0]._highest_parent_._checkgrad(self, verbose, step, tolerance)
|
||||
#checkgrad.__doc__ = Gradcheckable.checkgrad.__doc__
|
||||
|
||||
__lt__ = lambda self, val: self._vals() < val
|
||||
__le__ = lambda self, val: self._vals() <= val
|
||||
__eq__ = lambda self, val: self._vals() == val
|
||||
__ne__ = lambda self, val: self._vals() != val
|
||||
__gt__ = lambda self, val: self._vals() > val
|
||||
__ge__ = lambda self, val: self._vals() >= val
|
||||
def __str__(self, *args, **kwargs):
|
||||
def f(p):
|
||||
ind = p._raveled_index()
|
||||
|
|
@ -555,9 +554,9 @@ class ParamConcatenation(object):
|
|||
return "\n{}\n".format(" -"+"- | -".join(['-'*l for l in [li,lx,lc,lt]])).join(strings)
|
||||
def __repr__(self):
|
||||
return "\n".join(map(repr,self.params))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
||||
|
||||
from GPy.core.parameterized import Parameterized
|
||||
from GPy.core.parameter import Param
|
||||
|
|
@ -567,17 +566,17 @@ if __name__ == '__main__':
|
|||
print "random done"
|
||||
p = Param("q_mean", X)
|
||||
p1 = Param("q_variance", numpy.random.rand(*p.shape))
|
||||
p2 = Param("Y", numpy.random.randn(p.shape[0],1))
|
||||
|
||||
p2 = Param("Y", numpy.random.randn(p.shape[0], 1))
|
||||
|
||||
p3 = Param("variance", numpy.random.rand())
|
||||
p4 = Param("lengthscale", numpy.random.rand(2))
|
||||
|
||||
|
||||
m = Parameterized()
|
||||
rbf = Parameterized(name='rbf')
|
||||
|
||||
|
||||
rbf.add_parameter(p3,p4)
|
||||
m.add_parameter(p,p1,rbf)
|
||||
|
||||
|
||||
print "setting params"
|
||||
#print m.q_v[3:5,[1,4,5]]
|
||||
print "constraining variance"
|
||||
|
|
|
|||
|
|
@ -14,12 +14,13 @@ class Observable(object):
|
|||
_observers_ = {}
|
||||
def add_observer(self, observer, callble):
|
||||
self._observers_[observer] = callble
|
||||
callble(self)
|
||||
#callble(self)
|
||||
def remove_observer(self, observer):
|
||||
del self._observers_[observer]
|
||||
def _notify_observers(self):
|
||||
[callble(self) for callble in self._observers_.itervalues()]
|
||||
|
||||
|
||||
|
||||
class Pickleable(object):
|
||||
def _getstate(self):
|
||||
"""
|
||||
|
|
@ -35,9 +36,9 @@ class Pickleable(object):
|
|||
Set the state (memento pattern) of this class to the given state.
|
||||
Usually this is just the counterpart to _getstate, such that
|
||||
an object is a copy of another when calling
|
||||
|
||||
|
||||
copy = <classname>.__new__(*args,**kw)._setstate(<to_be_copied>._getstate())
|
||||
|
||||
|
||||
See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
|
||||
"""
|
||||
raise NotImplementedError, "To be able to use pickling you need to implement this method"
|
||||
|
|
@ -47,19 +48,24 @@ class Pickleable(object):
|
|||
#===============================================================================
|
||||
|
||||
class Parentable(object):
|
||||
def __init__(self, direct_parent=None, highest_parent=None, parent_index=None):
|
||||
super(Parentable,self).__init__()
|
||||
def __init__(self, direct_parent=None, parent_index=None):
|
||||
super(Parentable,self).__init__()
|
||||
self._direct_parent_ = direct_parent
|
||||
self._parent_index_ = parent_index
|
||||
self._highest_parent_ = highest_parent
|
||||
|
||||
|
||||
def has_parent(self):
|
||||
return self._direct_parent_ is not None and self._highest_parent_ is not None
|
||||
|
||||
return self._direct_parent_ is not None
|
||||
|
||||
@property
|
||||
def _highest_parent_(self):
|
||||
if self._direct_parent_ is None:
|
||||
return self
|
||||
return self._direct_parent_._highest_parent_
|
||||
|
||||
class Nameable(Parentable):
|
||||
_name = None
|
||||
def __init__(self, name, direct_parent=None, highest_parent=None, parent_index=None):
|
||||
super(Nameable,self).__init__(direct_parent, highest_parent, parent_index)
|
||||
def __init__(self, name, direct_parent=None, parent_index=None):
|
||||
super(Nameable,self).__init__(direct_parent, parent_index)
|
||||
self._name = name or self.__class__.__name__
|
||||
|
||||
@property
|
||||
|
|
@ -68,13 +74,45 @@ class Nameable(Parentable):
|
|||
@name.setter
|
||||
def name(self, name):
|
||||
from_name = self.name
|
||||
self._name = name
|
||||
self._name = name
|
||||
if self.has_parent():
|
||||
self._direct_parent_._name_changed(self, from_name)
|
||||
|
||||
self._direct_parent_._name_changed(self, from_name)
|
||||
|
||||
class Gradcheckable(Parentable):
|
||||
#===========================================================================
|
||||
# Gradchecking
|
||||
#===========================================================================
|
||||
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
|
||||
if self.has_parent():
|
||||
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
|
||||
return self._checkgrad(self[''], verbose=verbose, step=step, tolerance=tolerance)
|
||||
def _checkgrad(self, param):
|
||||
raise NotImplementedError, "Need log likelihood to check gradient against"
|
||||
|
||||
|
||||
class Constrainable(Nameable):
|
||||
def __init__(self, name):
|
||||
def __init__(self, name, default_constraint=None):
|
||||
super(Constrainable,self).__init__(name)
|
||||
self._default_constraint_ = default_constraint
|
||||
#===========================================================================
|
||||
# Fixing Parameters:
|
||||
#===========================================================================
|
||||
def constrain_fixed(self, value=None, warning=True):
|
||||
"""
|
||||
Constrain this paramter to be fixed to the current value it carries.
|
||||
|
||||
:param warning: print a warning for overwriting constraints.
|
||||
"""
|
||||
if value is not None:
|
||||
self[:] = value
|
||||
self._highest_parent_._fix(self,warning)
|
||||
fix = constrain_fixed
|
||||
def unconstrain_fixed(self):
|
||||
"""
|
||||
This parameter will no longer be fixed.
|
||||
"""
|
||||
self._highest_parent_._unfix(self)
|
||||
unfix = unconstrain_fixed
|
||||
#===========================================================================
|
||||
# Constrain operations -> done
|
||||
#===========================================================================
|
||||
|
|
@ -83,7 +121,7 @@ class Constrainable(Nameable):
|
|||
:param transform: the :py:class:`GPy.core.transformations.Transformation`
|
||||
to constrain the this parameter to.
|
||||
:param warning: print a warning if re-constraining parameters.
|
||||
|
||||
|
||||
Constrain the parameter to the given
|
||||
:py:class:`GPy.core.transformations.Transformation`.
|
||||
"""
|
||||
|
|
@ -96,37 +134,37 @@ class Constrainable(Nameable):
|
|||
self._add_constrain(p, transform, warning)
|
||||
if update:
|
||||
self.parameters_changed()
|
||||
|
||||
def constrain_positive(self, warning=True):
|
||||
|
||||
def constrain_positive(self, warning=True, update=True):
|
||||
"""
|
||||
:param warning: print a warning if re-constraining parameters.
|
||||
|
||||
|
||||
Constrain this parameter to the default positive constraint.
|
||||
"""
|
||||
self.constrain(Logexp(), warning)
|
||||
self.constrain(Logexp(), warning=warning, update=update)
|
||||
|
||||
def constrain_negative(self, warning=True):
|
||||
def constrain_negative(self, warning=True, update=True):
|
||||
"""
|
||||
:param warning: print a warning if re-constraining parameters.
|
||||
|
||||
|
||||
Constrain this parameter to the default negative constraint.
|
||||
"""
|
||||
self.constrain(NegativeLogexp(), warning)
|
||||
self.constrain(NegativeLogexp(), warning=warning, update=update)
|
||||
|
||||
def constrain_bounded(self, lower, upper, warning=True):
|
||||
def constrain_bounded(self, lower, upper, warning=True, update=True):
|
||||
"""
|
||||
:param lower, upper: the limits to bound this parameter to
|
||||
:param warning: print a warning if re-constraining parameters.
|
||||
|
||||
|
||||
Constrain this parameter to lie within the given range.
|
||||
"""
|
||||
self.constrain(Logistic(lower, upper), warning)
|
||||
self.constrain(Logistic(lower, upper), warning=warning, update=update)
|
||||
|
||||
def unconstrain(self, *transforms):
|
||||
"""
|
||||
:param transforms: The transformations to unconstrain from.
|
||||
|
||||
remove all :py:class:`GPy.core.transformations.Transformation`
|
||||
|
||||
remove all :py:class:`GPy.core.transformations.Transformation`
|
||||
transformats of this parameter object.
|
||||
"""
|
||||
if self.has_parent():
|
||||
|
|
@ -137,20 +175,20 @@ class Constrainable(Nameable):
|
|||
|
||||
def unconstrain_positive(self):
|
||||
"""
|
||||
Remove positive constraint of this parameter.
|
||||
Remove positive constraint of this parameter.
|
||||
"""
|
||||
self.unconstrain(Logexp())
|
||||
|
||||
def unconstrain_negative(self):
|
||||
"""
|
||||
Remove negative constraint of this parameter.
|
||||
Remove negative constraint of this parameter.
|
||||
"""
|
||||
self.unconstrain(NegativeLogexp())
|
||||
|
||||
def unconstrain_bounded(self, lower, upper):
|
||||
"""
|
||||
:param lower, upper: the limits to unbound this parameter from
|
||||
|
||||
|
||||
Remove (lower, upper) bounded constrain from this parameter/
|
||||
"""
|
||||
self.unconstrain(Logistic(lower, upper))
|
||||
|
|
|
|||
|
|
@ -8,9 +8,10 @@ import cPickle
|
|||
import itertools
|
||||
from re import compile, _pattern_type
|
||||
from param import ParamConcatenation, Param
|
||||
from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing
|
||||
from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing, Gradcheckable
|
||||
from index_operations import ParameterIndexOperations,\
|
||||
index_empty
|
||||
from array_core import ParamList
|
||||
|
||||
#===============================================================================
|
||||
# Printing:
|
||||
|
|
@ -23,7 +24,7 @@ FIXED = False
|
|||
UNFIXED = True
|
||||
#===============================================================================
|
||||
|
||||
class Parameterized(Constrainable, Pickleable, Observable):
|
||||
class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
|
||||
"""
|
||||
Parameterized class
|
||||
|
||||
|
|
@ -69,11 +70,11 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
super(Parameterized, self).__init__(name=name)
|
||||
self._in_init_ = True
|
||||
self._constraints_ = None#ParameterIndexOperations()
|
||||
if not hasattr(self, "_parameters_"):
|
||||
self._parameters_ = []
|
||||
self._parameters_ = ParamList()
|
||||
self.size = sum(p.size for p in self._parameters_)
|
||||
if not self._has_fixes():
|
||||
self._fixes_ = None
|
||||
self._param_slices_ = []
|
||||
self._connect_parameters()
|
||||
self._added_names_ = set()
|
||||
del self._in_init_
|
||||
|
|
@ -153,6 +154,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
elif param._has_fixes(): self._fixes_ = np.r_[np.ones(self.size, dtype=bool), fixes_param]
|
||||
|
||||
else:
|
||||
start = sum(p.size for p in self._parameters_[:index])
|
||||
self.constraints.shift(start, param.size)
|
||||
self._parameters_.insert(index, param)
|
||||
|
||||
# make sure fixes and constraints are indexed right
|
||||
|
|
@ -164,12 +167,17 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
self._fixes_ = np.ones(self.size+param.size, dtype=bool)
|
||||
self._fixes_[ins:ins+param.size] = fixes_param
|
||||
self.size += param.size
|
||||
else:
|
||||
raise RuntimeError, """Parameter exists already added and no copy made"""
|
||||
self._connect_parameters()
|
||||
# make sure the constraints are pulled over:
|
||||
if hasattr(param, "_constraints_") and param._constraints_ is not None:
|
||||
for t, ind in param._constraints_.iteritems():
|
||||
|
||||
self.constraints.add(t, ind+self._offset_for(param))
|
||||
param._constraints_.clear()
|
||||
if param._default_constraint_ is not None:
|
||||
self._add_constrain(param, param._default_constraint_, False)
|
||||
if self._has_fixes() and np.all(self._fixes_): # ==UNFIXED
|
||||
self._fixes_= None
|
||||
|
||||
|
|
@ -188,10 +196,10 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
note: if it is a string object it will not (!) be regexp-matched
|
||||
automatically.
|
||||
"""
|
||||
self._parameters_ = [p for p in self._parameters_
|
||||
self._parameters_ = ParamList([p for p in self._parameters_
|
||||
if not (p._parent_index_ in names_params_indices
|
||||
or p.name in names_params_indices
|
||||
or p in names_params_indices)]
|
||||
or p in names_params_indices)])
|
||||
self._connect_parameters()
|
||||
|
||||
def parameters_changed(self):
|
||||
|
|
@ -211,18 +219,11 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
if not hasattr(self, "_parameters_") or len(self._parameters_) < 1:
|
||||
# no parameters for this class
|
||||
return
|
||||
i = 0
|
||||
sizes = [0]
|
||||
self._param_slices_ = []
|
||||
for p in self._parameters_:
|
||||
for i,p in enumerate(self._parameters_):
|
||||
p._direct_parent_ = self
|
||||
p._highest_parent_ = self
|
||||
p._parent_index_ = i
|
||||
i += 1
|
||||
for pi in p.flattened_parameters:
|
||||
pi._highest_parent_ = self
|
||||
for pi in p._parameters_:
|
||||
pi._highest_parent_ = self
|
||||
not_unique = []
|
||||
sizes.append(p.size+sizes[-1])
|
||||
self._param_slices_.append(slice(sizes[-2], sizes[-1]))
|
||||
|
|
@ -254,6 +255,16 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
cPickle.dump(self, f, protocol)
|
||||
def copy(self):
|
||||
"""Returns a (deep) copy of the current model """
|
||||
#dc = dict()
|
||||
#for k, v in self.__dict__.iteritems():
|
||||
#if k not in ['_highest_parent_', '_direct_parent_']:
|
||||
#dc[k] = copy.deepcopy(v)
|
||||
|
||||
#dc = copy.deepcopy(self.__dict__)
|
||||
#dc['_highest_parent_'] = None
|
||||
#dc['_direct_parent_'] = None
|
||||
#s = self.__class__.new()
|
||||
#s.__dict__ = dc
|
||||
return copy.deepcopy(self)
|
||||
def __getstate__(self):
|
||||
if self._has_get_set_state():
|
||||
|
|
@ -308,14 +319,20 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
#===========================================================================
|
||||
# Optimization handles:
|
||||
#===========================================================================
|
||||
def _get_param_names_transformed(self):
|
||||
def _get_param_names(self):
|
||||
n = numpy.array([p.name_hirarchical+'['+str(i)+']' for p in self.flattened_parameters for i in p._indices()])
|
||||
return n
|
||||
def _get_param_names_transformed(self):
|
||||
n = self._get_param_names()
|
||||
if self._has_fixes():
|
||||
return n[self._fixes_]
|
||||
return n
|
||||
def _get_params(self):
|
||||
# don't overwrite this anymore!
|
||||
return numpy.hstack([x._get_params() for x in self._parameters_])
|
||||
if not self.size:
|
||||
return np.empty(shape=(0,), dtype=np.float64)
|
||||
return numpy.hstack([x._get_params() for x in self._parameters_ if x.size>0])
|
||||
|
||||
def _set_params(self, params, update=True):
|
||||
# don't overwrite this anymore!
|
||||
[p._set_params(params[s], update=update) for p,s in itertools.izip(self._parameters_,self._param_slices_)]
|
||||
|
|
@ -329,10 +346,12 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
return p
|
||||
def _set_params_transformed(self, p):
|
||||
# inverse apply transformations for parameters and set the resulting parameters
|
||||
self._set_params(self._untransform_params(p))
|
||||
def _untransform_params(self, p):
|
||||
p = p.copy()
|
||||
if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
|
||||
[numpy.put(p, ind, c.f(p[ind])) for c,ind in self.constraints.iteritems() if c != __fixed__]
|
||||
self._set_params(p)
|
||||
return p
|
||||
def _name_changed(self, param, old_name):
|
||||
if hasattr(self, old_name) and old_name in self._added_names_:
|
||||
delattr(self, old_name)
|
||||
|
|
@ -365,6 +384,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
that is an int array, containing the indexes for the flattened
|
||||
param inside this parameterized logic.
|
||||
"""
|
||||
if isinstance(param, ParamConcatenation):
|
||||
return numpy.hstack((self._raveled_index_for(p) for p in param.params))
|
||||
return param._raveled_index() + self._offset_for(param)
|
||||
|
||||
def _raveled_index(self):
|
||||
|
|
@ -374,7 +395,7 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
"""
|
||||
return numpy.r_[:self.size]
|
||||
#===========================================================================
|
||||
# Handle ties:
|
||||
# Fixing parameters:
|
||||
#===========================================================================
|
||||
def _set_fixed(self, param_or_index):
|
||||
if not self._has_fixes(): self._fixes_ = numpy.ones(self.size, dtype=bool)
|
||||
|
|
@ -399,9 +420,6 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
if self._has_fixes():
|
||||
return self._fixes_[self._raveled_index_for(param)]
|
||||
return numpy.ones(self.size, dtype=bool)[self._raveled_index_for(param)]
|
||||
#===========================================================================
|
||||
# Fixing parameters:
|
||||
#===========================================================================
|
||||
def _fix(self, param, warning=True):
|
||||
f = self._add_constrain(param, __fixed__, warning)
|
||||
self._set_fixed(f)
|
||||
|
|
@ -412,6 +430,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
#===========================================================================
|
||||
# Convenience for fixed, tied checking of param:
|
||||
#===========================================================================
|
||||
def fixed_indices(self):
|
||||
return np.array([x.is_fixed for x in self._parameters_])
|
||||
def _is_fixed(self, param):
|
||||
# returns if the whole param is fixed
|
||||
if not self._has_fixes():
|
||||
|
|
@ -441,7 +461,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
# if removing constraints before adding new is not wanted, just delete the above line!
|
||||
self.constraints.add(transform, rav_i)
|
||||
param = self._get_original(param)
|
||||
param._set_params(transform.initialize(param._get_params()))
|
||||
if not (transform == __fixed__):
|
||||
param._set_params(transform.initialize(param._get_params()), update=False)
|
||||
if warning and any(reconstrained):
|
||||
# if you want to print the whole params object, which was reconstrained use:
|
||||
# m = str(param[self._backtranslate_index(param, reconstrained)])
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@
|
|||
|
||||
import numpy as np
|
||||
from domains import _POSITIVE,_NEGATIVE, _BOUNDED
|
||||
import sys
|
||||
import sys
|
||||
import weakref
|
||||
_lim_val = -np.log(sys.float_info.epsilon)
|
||||
_lim_val = -np.log(sys.float_info.epsilon)
|
||||
|
||||
class Transformation(object):
|
||||
domain = None
|
||||
|
|
@ -94,7 +94,7 @@ class LogexpClipped(Logexp):
|
|||
def __str__(self):
|
||||
return '+ve_c'
|
||||
|
||||
|
||||
|
||||
class Exponent(Transformation):
|
||||
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative Exponent below. See old MATLAB code.
|
||||
domain = _POSITIVE
|
||||
|
|
@ -162,9 +162,11 @@ class Logistic(Transformation):
|
|||
def initialize(self, f):
|
||||
if np.any(np.logical_or(f < self.lower, f > self.upper)):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
|
||||
#return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
|
||||
#FIXME: Max, zeros_like right?
|
||||
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(np.zeros_like(f)), f)
|
||||
def __str__(self):
|
||||
return '{},{}'.format(self.lower, self.upper)
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -3,10 +3,9 @@ Created on 6 Nov 2013
|
|||
|
||||
@author: maxz
|
||||
'''
|
||||
import numpy as np
|
||||
from parameterized import Parameterized
|
||||
from param import Param
|
||||
from ...util.misc import param_to_array
|
||||
from transformations import Logexp
|
||||
|
||||
class Normal(Parameterized):
|
||||
'''
|
||||
|
|
@ -16,9 +15,9 @@ class Normal(Parameterized):
|
|||
'''
|
||||
def __init__(self, means, variances, name='latent space'):
|
||||
Parameterized.__init__(self, name=name)
|
||||
self.means = Param("mean", means)
|
||||
self.variances = Param('variance', variances)
|
||||
self.add_parameters(self.means, self.variances)
|
||||
self.mean = Param("mean", means)
|
||||
self.variance = Param('variance', variances, Logexp())
|
||||
self.add_parameters(self.mean, self.variance)
|
||||
|
||||
def plot(self, *args):
|
||||
"""
|
||||
|
|
@ -26,6 +25,7 @@ class Normal(Parameterized):
|
|||
|
||||
See GPy.plotting.matplot_dep.variational_plots
|
||||
"""
|
||||
import sys
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import variational_plots
|
||||
from ...plotting.matplot_dep import variational_plots
|
||||
return variational_plots.plot(self,*args)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ from ..util.linalg import mdot, tdot, symmetrify, backsub_both_sides, chol_inv,
|
|||
from gp import GP
|
||||
from parameterization.param import Param
|
||||
from ..inference.latent_function_inference import varDTC
|
||||
from .. import likelihoods
|
||||
from GPy.util.misc import param_to_array
|
||||
|
||||
class SparseGP(GP):
|
||||
"""
|
||||
|
|
@ -35,46 +37,58 @@ class SparseGP(GP):
|
|||
#pick a sensible inference method
|
||||
if inference_method is None:
|
||||
if isinstance(likelihood, likelihoods.Gaussian):
|
||||
inference_method = varDTC.Gaussian_inference()
|
||||
inference_method = varDTC.VarDTC()
|
||||
else:
|
||||
#inference_method = ??
|
||||
raise NotImplementedError, "what to do what to do?"
|
||||
print "defaulting to ", inference_method, "for latent function inference"
|
||||
|
||||
GP.__init__(self, X, Y, likelihood, inference_method, kernel, name)
|
||||
|
||||
self.Z = Z
|
||||
self.Z = Param('inducing inputs', Z)
|
||||
self.num_inducing = Z.shape[0]
|
||||
|
||||
if X_variance is None:
|
||||
self.has_uncertain_inputs = False
|
||||
self.X_variance = None
|
||||
else:
|
||||
if not (X_variance is None):
|
||||
assert X_variance.shape == X.shape
|
||||
self.has_uncertain_inputs = True
|
||||
self.X_variance = X_variance
|
||||
self.X_variance = X_variance
|
||||
|
||||
self.Z = Param('inducing inputs', self.Z)
|
||||
self.add_parameter(self.Z, gradient=self.dL_dZ, index=0)
|
||||
self.add_parameter(self.kern, gradient=self.dL_dtheta)
|
||||
self.add_parameter(self.likelihood, gradient=lambda:self.likelihood._gradients(partial=self.partial_for_likelihood))
|
||||
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name)
|
||||
|
||||
self.add_parameter(self.Z, index=0)
|
||||
|
||||
def parameters_changed(self):
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
|
||||
|
||||
#The derivative of the bound wrt the inducing inputs Z
|
||||
self.Z.gradient = self.kern.dK_dX(self.dL_dKmm, self.Z)
|
||||
if self.has_uncertain_inputs:
|
||||
self.Z.gradient += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
||||
self.Z.gradient += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
||||
#The derivative of the bound wrt the inducing inputs Z
|
||||
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
||||
if self.X_variance is None:
|
||||
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
|
||||
else:
|
||||
self.Z.gradient += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X)
|
||||
self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
|
||||
self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
|
||||
|
||||
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
|
||||
"""
|
||||
Make a prediction for the latent function values
|
||||
"""
|
||||
#TODO!!!
|
||||
if X_variance_new is None:
|
||||
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
|
||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||
if full_cov:
|
||||
Kxx = self.kern.K(Xnew, which_parts=which_parts)
|
||||
var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting
|
||||
else:
|
||||
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
|
||||
var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0)
|
||||
else:
|
||||
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
|
||||
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
|
||||
mu = np.dot(Kx, self.Cpsi1V)
|
||||
if full_cov:
|
||||
raise NotImplementedError, "TODO"
|
||||
else:
|
||||
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
|
||||
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
|
||||
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
|
||||
return mu, var[:,None]
|
||||
|
||||
|
||||
def _getstate(self):
|
||||
|
|
|
|||
|
|
@ -154,13 +154,13 @@ class SVIGP(GP):
|
|||
self.psi2 = None
|
||||
|
||||
def dL_dtheta(self):
|
||||
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
|
||||
dL_dtheta = self.kern._param_grad_helper(self.dL_dKmm, self.Z)
|
||||
if self.has_uncertain_inputs:
|
||||
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch)
|
||||
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch)
|
||||
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch)
|
||||
else:
|
||||
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X_batch, self.Z)
|
||||
dL_dtheta += self.kern._param_grad_helper(self.dL_dpsi1, self.X_batch, self.Z)
|
||||
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch)
|
||||
return dL_dtheta
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
import numpy as _np
|
||||
default_seed = _np.random.seed(123344)
|
||||
|
||||
def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False):
|
||||
def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, output_dim=1e4):
|
||||
"""
|
||||
model for testing purposes. Samples from a GP with rbf kernel and learns
|
||||
the samples with a new kernel. Normally not for optimization, just model cheking
|
||||
|
|
@ -15,54 +15,54 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False):
|
|||
num_inducing = 5
|
||||
if plot:
|
||||
output_dim = 1
|
||||
input_dim = 2
|
||||
input_dim = 3
|
||||
else:
|
||||
input_dim = 2
|
||||
output_dim = 25
|
||||
input_dim = 1
|
||||
output_dim = output_dim
|
||||
|
||||
# generate GPLVM-like data
|
||||
X = _np.random.rand(num_inputs, input_dim)
|
||||
lengthscales = _np.random.rand(input_dim)
|
||||
k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True)
|
||||
+ GPy.kern.white(input_dim, 0.01))
|
||||
#+ GPy.kern.white(input_dim, 0.01)
|
||||
)
|
||||
K = k.K(X)
|
||||
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T
|
||||
lik = Gaussian(Y, normalize=True)
|
||||
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
|
||||
|
||||
k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
|
||||
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
|
||||
k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
|
||||
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
|
||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
|
||||
|
||||
m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing)
|
||||
m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
||||
#===========================================================================
|
||||
# randomly obstruct data with percentage p
|
||||
p = .8
|
||||
Y_obstruct = Y.copy()
|
||||
Y_obstruct[_np.random.uniform(size=(Y.shape)) < p] = _np.nan
|
||||
#===========================================================================
|
||||
m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
|
||||
#m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
|
||||
m.lengthscales = lengthscales
|
||||
|
||||
if plot:
|
||||
import matplotlib.pyplot as pb
|
||||
m.plot()
|
||||
pb.title('PCA initialisation')
|
||||
m2.plot()
|
||||
pb.title('PCA initialisation')
|
||||
#m2.plot()
|
||||
#pb.title('PCA initialisation')
|
||||
|
||||
if optimize:
|
||||
m.optimize('scg', messages=verbose)
|
||||
m2.optimize('scg', messages=verbose)
|
||||
#m2.optimize('scg', messages=verbose)
|
||||
if plot:
|
||||
m.plot()
|
||||
pb.title('After optimisation')
|
||||
m2.plot()
|
||||
pb.title('After optimisation')
|
||||
#m2.plot()
|
||||
#pb.title('After optimisation')
|
||||
|
||||
return m, m2
|
||||
return m
|
||||
|
||||
def gplvm_oil_100(optimize=True, verbose=1, plot=True):
|
||||
import GPy
|
||||
|
|
@ -264,16 +264,15 @@ def bgplvm_simulation(optimize=True, verbose=1,
|
|||
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
|
||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||
Y = Ylist[0]
|
||||
k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||
k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
|
||||
m['noise'] = Y.var() / 100.
|
||||
|
||||
|
||||
if optimize:
|
||||
print "Optimizing model:"
|
||||
m.optimize('scg', messages=verbose, max_iters=max_iters,
|
||||
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
|
||||
gtol=.05)
|
||||
if plot:
|
||||
m.plot_X_1d("BGPLVM Latent Space 1D")
|
||||
m.q.plot("BGPLVM Latent Space 1D")
|
||||
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
|
||||
return m
|
||||
|
||||
|
|
|
|||
|
|
@ -37,39 +37,43 @@ def student_t_approx(optimize=True, plot=True):
|
|||
|
||||
# Kernel object
|
||||
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
kernel2 = kernel1.copy()
|
||||
kernel3 = kernel1.copy()
|
||||
kernel4 = kernel1.copy()
|
||||
kernel2 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
kernel3 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
kernel4 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
|
||||
#Gaussian GP model on clean data
|
||||
m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
|
||||
# optimize
|
||||
m1.ensure_default_constraints()
|
||||
m1.constrain_fixed('white', 1e-5)
|
||||
m1['white'] = 1e-5
|
||||
m1['white'].constrain_fixed('white')
|
||||
m1.randomize()
|
||||
|
||||
#Gaussian GP model on corrupt data
|
||||
m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2)
|
||||
m2.ensure_default_constraints()
|
||||
m2.constrain_fixed('white', 1e-5)
|
||||
m1['white'] = 1e-5
|
||||
m1['white'].constrain_fixed('white')
|
||||
m2.randomize()
|
||||
|
||||
#Student t GP model on clean data
|
||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
|
||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution)
|
||||
m3 = GPy.models.GPRegression(X, Y.copy(), kernel3, likelihood=stu_t_likelihood)
|
||||
t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
|
||||
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
|
||||
m3 = GPy.core.GP(X, Y.copy(), kernel3, likelihood=t_distribution, inference_method=laplace_inf)
|
||||
m3.ensure_default_constraints()
|
||||
m3.constrain_bounded('t_noise', 1e-6, 10.)
|
||||
m3.constrain_fixed('white', 1e-5)
|
||||
m3['t_noise'].constrain_bounded(1e-6, 10.)
|
||||
m3['white'] = 1e-5
|
||||
m3['white'].constrain_fixed()
|
||||
m3.randomize()
|
||||
|
||||
#Student t GP model on corrupt data
|
||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
|
||||
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution)
|
||||
m4 = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
|
||||
t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
|
||||
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
|
||||
m4 = GPy.core.GP(X, Yc.copy(), kernel4, likelihood=t_distribution, inference_method=laplace_inf)
|
||||
m4.ensure_default_constraints()
|
||||
m4.constrain_bounded('t_noise', 1e-6, 10.)
|
||||
m4.constrain_fixed('white', 1e-5)
|
||||
m4['t_noise'].constrain_bounded(1e-6, 10.)
|
||||
m4['white'] = 1e-5
|
||||
m4['white'].constrain_fixed()
|
||||
m4.randomize()
|
||||
|
||||
if optimize:
|
||||
|
|
|
|||
|
|
@ -281,11 +281,12 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True):
|
|||
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
|
||||
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
|
||||
|
||||
noise_model = GPy.likelihoods.poisson()
|
||||
likelihood = GPy.likelihoods.Laplace(Y,noise_model)
|
||||
kern = GPy.kern.rbf(1)
|
||||
poisson_lik = GPy.likelihoods.Poisson()
|
||||
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
|
||||
|
||||
# create simple GP Model
|
||||
m = GPy.models.GPRegression(X, Y, likelihood=likelihood)
|
||||
m = GPy.core.GP(X, Y, kernel=kern, likelihood=poisson_lik, inference_method=laplace_inf)
|
||||
|
||||
if optimize:
|
||||
m.optimize(optimizer)
|
||||
|
|
@ -472,9 +473,9 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
|
|||
Z = np.random.uniform(-3., 3., (7, 1))
|
||||
|
||||
k = GPy.kern.rbf(1)
|
||||
|
||||
import ipdb;ipdb.set_trace()
|
||||
# create simple GP Model - no input uncertainty on this one
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z)
|
||||
|
||||
if optimize:
|
||||
m.optimize('scg', messages=1, max_iters=max_iters)
|
||||
|
|
@ -485,7 +486,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
|
|||
print m
|
||||
|
||||
# the same Model with uncertainty
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S)
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z, X_variance=S)
|
||||
if optimize:
|
||||
m.optimize('scg', messages=1, max_iters=max_iters)
|
||||
if plot:
|
||||
|
|
|
|||
96
GPy/inference/latent_function_inference/DTC.py
Normal file
96
GPy/inference/latent_function_inference/DTC.py
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
# Copyright (c) 2012, James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from posterior import Posterior
|
||||
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
|
||||
import numpy as np
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class DTC(object):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
|
||||
|
||||
The function self.inference returns a Posterior object, which summarizes
|
||||
the posterior.
|
||||
|
||||
NB. It's not recommended to use this function! It's here for historical purposes.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
self.const_jitter = 1e-6
|
||||
|
||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
||||
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
|
||||
|
||||
num_inducing, _ = Z.shape
|
||||
num_data, output_dim = Y.shape
|
||||
|
||||
#make sure the noise is not hetero
|
||||
beta = 1./np.squeeze(likelihood.variance)
|
||||
if beta.size <1:
|
||||
raise NotImplementedError, "no hetero noise with this implementatino of DTC"
|
||||
|
||||
Kmm = kern.K(Z)
|
||||
Knn = kern.Kdiag(X)
|
||||
Knm = kern.K(X, Z)
|
||||
U = Knm
|
||||
Uy = np.dot(U.T,Y)
|
||||
|
||||
#factor Kmm
|
||||
Kmmi, L, Li, _ = pdinv(Kmm)
|
||||
|
||||
# Compute A
|
||||
LiUT, _ = dtrtrs(L, U.T*np.sqrt(beta), lower=1)
|
||||
A_I = tdot(LiUT)
|
||||
A = A_I + np.eye(num_inducing)
|
||||
|
||||
# factor A
|
||||
LA = jitchol(A)
|
||||
|
||||
# back substutue to get b, P, v
|
||||
tmp, _ = dtrtrs(L, Uy, lower=1)
|
||||
b, _ = dtrtrs(LA, tmp*beta, lower=1)
|
||||
tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
|
||||
v, _ = dtrtrs(L, tmp, lower=1, trans=1)
|
||||
tmp = tdrtrs(LA, Li, lower=1, trans=0)
|
||||
P = tdot(tmp.T)
|
||||
|
||||
#compute log marginal
|
||||
log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
|
||||
-np.sum(np.log(np.diag(LA)))*output_dim + \
|
||||
0.5*num_data*output_dim*np.log(beta) + \
|
||||
-0.5*beta*np.sum(np.square(Y)) +
|
||||
0.5*np.sum(np.square(b))
|
||||
|
||||
# Compute dL_dKmm
|
||||
tmp, _ = dtrtrs(L, A_I, lower=1, trans=1)
|
||||
dL_dK, _ = dtrtrs(L, tmp.T, lower=1, trans=0)
|
||||
tmp, _ = dtrtrs(LA, tmp.T. lower=1, trans=1)
|
||||
dL_dK -= tdot(tmp.T)
|
||||
dL_dK *= output_dim
|
||||
dL_dK -= tdot(v)
|
||||
dL_dK /=2.
|
||||
|
||||
# Compute dL_dU
|
||||
vvT_P = tdot(v.reshape(-1,1)) + P
|
||||
vY = np.dot(v.reshape(-1,1),Y.T)
|
||||
dL_dU = vY + np.dot(vvT_P, U.T)
|
||||
dL_dU *= beta
|
||||
|
||||
#compute dL_dR
|
||||
Uv = np.dot(U, v)
|
||||
dL_dR = 0.5*(np.sum(U*np.dot(P, U.T), 1) - beta * np.sum(np.square(Y, 1)) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1)
|
||||
)*beta**2
|
||||
|
||||
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU}
|
||||
|
||||
#update gradients
|
||||
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
|
||||
likelihood.update_gradients(dL_dR)
|
||||
|
||||
#construct a posterior object
|
||||
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
|
||||
|
||||
return post, log_marginal, grad_dict
|
||||
|
||||
|
||||
|
|
@ -24,4 +24,5 @@ etc.
|
|||
"""
|
||||
|
||||
from exact_gaussian_inference import ExactGaussianInference
|
||||
from laplace import LaplaceInference
|
||||
expectation_propagation = 'foo' # TODO
|
||||
|
|
|
|||
|
|
@ -53,6 +53,6 @@ class ExactGaussianInference(object):
|
|||
|
||||
likelihood.update_gradients(np.diag(dL_dK))
|
||||
|
||||
return Posterior(LW, alpha, K), log_marginal, {'dL_dK':dL_dK}
|
||||
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,73 +1,54 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ...util.linalg import mdot, jitchol, chol_inv, tdot, dtrtrs
|
||||
from ...core import SparseGP
|
||||
class VarDTC(object):
|
||||
def __init__(self):
|
||||
self.const_jitter = 1e-6
|
||||
|
||||
class FITC(SparseGP):
|
||||
"""
|
||||
def inference(self, kern, X, X_variance, Z, likelihood, Y):
|
||||
|
||||
Sparse FITC approximation
|
||||
num_inducing, _ = Z.shape
|
||||
num_data, output_dim = Y.shape
|
||||
|
||||
:param X: inputs
|
||||
:type X: np.ndarray (num_data x Q)
|
||||
:param likelihood: a likelihood instance, containing the observed data
|
||||
:type likelihood: GPy.likelihood.(Gaussian | EP)
|
||||
:param kernel: the kernel (covariance function). See link kernels
|
||||
:type kernel: a GPy.kern.kern instance
|
||||
:param Z: inducing inputs (optional, see note)
|
||||
:type Z: np.ndarray (M x Q) | None
|
||||
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
|
||||
:type normalize_(X|Y): bool
|
||||
#make sure we're not using variational uncertain inputs
|
||||
assert = X_variance is None, "variational inducing inputs only for use with varDTC inference"
|
||||
#Note: we can;t do the variational thing after making the GITC conditional approximation because K~ appears in the log determinant.
|
||||
|
||||
"""
|
||||
#see whether we've got a different noise variance for each datum
|
||||
sigma2 = np.squeeze(likelihood.variance)
|
||||
het_noise = False
|
||||
if sigma2.size <1:
|
||||
het_noise = True
|
||||
|
||||
def __init__(self, X, likelihood, kernel, Z, normalize_X=False):
|
||||
SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False)
|
||||
assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs"
|
||||
|
||||
def update_likelihood_approximation(self, **kwargs):
|
||||
"""
|
||||
Approximates a non-Gaussian likelihood using Expectation Propagation
|
||||
|
||||
For a Gaussian likelihood, no iteration is required:
|
||||
this function does nothing
|
||||
"""
|
||||
self.likelihood.restart()
|
||||
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0, **kwargs)
|
||||
self._set_params(self._get_params())
|
||||
|
||||
def _compute_kernel_matrices(self):
|
||||
# kernel computations, using BGPLVM notation
|
||||
self.Kmm = self.kern.K(self.Z)
|
||||
self.psi0 = self.kern.Kdiag(self.X)
|
||||
self.psi1 = self.kern.K(self.Z, self.X)
|
||||
self.psi2 = None
|
||||
Kmm = kern.K(Z)
|
||||
Kdiag = kern.Kdiag(X)
|
||||
Kmn = kern.K(X, Z)
|
||||
|
||||
def _computations(self):
|
||||
#factor Kmm
|
||||
self.Lm = jitchol(self.Kmm)
|
||||
self.Lmi,info = dtrtrs(self.Lm,np.eye(self.num_inducing),lower=1)
|
||||
Lmipsi1 = np.dot(self.Lmi,self.psi1)
|
||||
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
|
||||
self.Diag0 = self.psi0 - np.diag(self.Qnn)
|
||||
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #NOTE: beta_star contains Diag0 and the precision
|
||||
self.V_star = self.beta_star * self.likelihood.Y
|
||||
Lm = jitchol(Kmm)
|
||||
V = dtrtrs(Lm, Kmn, lower=1)
|
||||
|
||||
# The rather complex computations of self.A
|
||||
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data)))
|
||||
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
|
||||
self.A = tdot(tmp)
|
||||
#compute effective noise
|
||||
g_sn2 = sigma2 + Kdiag - np.sum(V*V, 0)
|
||||
|
||||
# factor B
|
||||
self.B = np.eye(self.num_inducing) + self.A
|
||||
self.LB = jitchol(self.B)
|
||||
self.LBi = chol_inv(self.LB)
|
||||
self.psi1V = np.dot(self.psi1, self.V_star)
|
||||
#compute and factor B
|
||||
tmp = Kmn / np.sqrt(g_snd)
|
||||
tmp, _ = dtrtrs(Lm, tmp, lower=1)
|
||||
A = tdot(tmp)
|
||||
B = np.eye(num_inducing) + A
|
||||
Bi, Lb, LBi, log_det_B = pdinv(B)
|
||||
|
||||
Lmi_psi1V, info = dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
|
||||
self._LBi_Lmi_psi1V, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
|
||||
#compute posterior parameters
|
||||
tmp, _ = dtrtrs()
|
||||
woodbury_vec =
|
||||
|
||||
|
||||
|
||||
|
||||
psi1V = np.dot(self.psi1, self.V_star)
|
||||
Lmi_psi1V, _ = dtrtrs(Lm, self.psi1V, lower=1, trans=0)
|
||||
LBi_Lmi_psi1V, _ = dtrtrs(LB, Lmi_psi1V, lower=1, trans=0)
|
||||
|
||||
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
|
||||
b_psi1_Ki = self.beta_star * Kmmipsi1.T
|
||||
|
|
@ -116,10 +97,10 @@ class FITC(SparseGP):
|
|||
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
|
||||
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
|
||||
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
|
||||
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
|
||||
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
|
||||
self._dKmm_dX += self.kern.dK_dX(_dKmm ,self.Z)
|
||||
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
|
||||
self._dpsi1_dtheta += self.kern._param_grad_helper(_dpsi1,self.X[i:i+1,:],self.Z)
|
||||
self._dKmm_dtheta += self.kern._param_grad_helper(_dKmm,self.Z)
|
||||
self._dKmm_dX += self.kern.gradients_X(_dKmm ,self.Z)
|
||||
self._dpsi1_dX += self.kern.gradients_X(_dpsi1.T,self.Z,self.X[i:i+1,:])
|
||||
|
||||
# the partial derivative vector for the likelihood
|
||||
if self.likelihood.num_params == 0:
|
||||
|
|
@ -163,15 +144,15 @@ class FITC(SparseGP):
|
|||
|
||||
def dL_dtheta(self):
|
||||
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X)
|
||||
dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z)
|
||||
dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z)
|
||||
dL_dtheta += self.kern._param_grad_helper(self._dL_dpsi1,self.X,self.Z)
|
||||
dL_dtheta += self.kern._param_grad_helper(self._dL_dKmm,X=self.Z)
|
||||
dL_dtheta += self._dKmm_dtheta
|
||||
dL_dtheta += self._dpsi1_dtheta
|
||||
return dL_dtheta
|
||||
|
||||
def dL_dZ(self):
|
||||
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
|
||||
dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z)
|
||||
dL_dZ = self.kern.gradients_X(self._dL_dpsi1.T,self.Z,self.X)
|
||||
dL_dZ += self.kern.gradients_X(self._dL_dKmm,X=self.Z)
|
||||
dL_dZ += self._dpsi1_dX
|
||||
dL_dZ += self._dKmm_dX
|
||||
return dL_dZ
|
||||
|
|
|
|||
|
|
@ -11,255 +11,199 @@
|
|||
#http://gaussianprocess.org/gpml/code.
|
||||
|
||||
import numpy as np
|
||||
import scipy as sp
|
||||
from likelihood import likelihood
|
||||
from ..util.linalg import mdot, jitchol, pddet, dpotrs
|
||||
from functools import partial as partial_func
|
||||
from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify
|
||||
from ...util.misc import param_to_array
|
||||
from posterior import Posterior
|
||||
import warnings
|
||||
from scipy import optimize
|
||||
|
||||
class LaplaceInference(object):
|
||||
"""Laplace approximation to a posterior"""
|
||||
|
||||
def __init__(self, data, noise_model, extra_data=None):
|
||||
def __init__(self):
|
||||
"""
|
||||
Laplace Approximation
|
||||
|
||||
Find the moments \hat{f} and the hessian at this point
|
||||
(using Newton-Raphson) of the unnormalised posterior
|
||||
|
||||
Compute the GP variables (i.e. generate some Y^{squiggle} and
|
||||
z^{squiggle} which makes a gaussian the same as the laplace
|
||||
approximation to the posterior, but normalised
|
||||
|
||||
Arguments
|
||||
---------
|
||||
|
||||
:param data: array of data the likelihood function is approximating
|
||||
:type data: NxD
|
||||
:param noise_model: likelihood function - subclass of noise_model
|
||||
:type noise_model: noise_model
|
||||
:param extra_data: additional data used by some likelihood functions,
|
||||
"""
|
||||
self.data = data
|
||||
self.noise_model = noise_model
|
||||
self.extra_data = extra_data
|
||||
|
||||
#Inital values
|
||||
self.N, self.D = self.data.shape
|
||||
self.is_heteroscedastic = True
|
||||
self.Nparams = 0
|
||||
self.NORMAL_CONST = ((0.5 * self.N) * np.log(2 * np.pi))
|
||||
self._mode_finding_tolerance = 1e-7
|
||||
self._mode_finding_max_iter = 40
|
||||
self.bad_fhat = True
|
||||
self._previous_Ki_fhat = None
|
||||
|
||||
self.restart()
|
||||
likelihood.__init__(self)
|
||||
|
||||
def restart(self):
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
"""
|
||||
Reset likelihood variables to their defaults
|
||||
Returns a Posterior class containing essential quantities of the posterior
|
||||
"""
|
||||
#Initial values for the GP variables
|
||||
self.Y = np.zeros((self.N, 1))
|
||||
self.covariance_matrix = np.eye(self.N)
|
||||
self.precision = np.ones(self.N)[:, None]
|
||||
self.Z = 0
|
||||
self.YYT = None
|
||||
|
||||
self.old_Ki_f = None
|
||||
self.bad_fhat = False
|
||||
#make Y a normal array!
|
||||
Y = param_to_array(Y)
|
||||
|
||||
def predictive_values(self,mu,var,full_cov,**noise_args):
|
||||
if full_cov:
|
||||
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
|
||||
return self.noise_model.predictive_values(mu,var,**noise_args)
|
||||
|
||||
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||
"""
|
||||
Calculation of the log predictive density
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||
:type mu_star: (Nx1) array
|
||||
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||
:type var_star: (Nx1) array
|
||||
"""
|
||||
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
|
||||
|
||||
def _get_params(self):
|
||||
return np.asarray(self.noise_model._get_params())
|
||||
|
||||
def _get_param_names(self):
|
||||
return self.noise_model._get_param_names()
|
||||
|
||||
def _set_params(self, p):
|
||||
return self.noise_model._set_params(p)
|
||||
|
||||
def _shared_gradients_components(self):
|
||||
d3lik_d3fhat = self.noise_model.d3logpdf_df3(self.f_hat, self.data, extra_data=self.extra_data)
|
||||
dL_dfhat = 0.5*(np.diag(self.Ki_W_i)[:, None]*d3lik_d3fhat).T #why isn't this -0.5?
|
||||
I_KW_i = np.eye(self.N) - np.dot(self.K, self.Wi_K_i)
|
||||
return dL_dfhat, I_KW_i
|
||||
|
||||
def _Kgradients(self):
|
||||
"""
|
||||
Gradients with respect to prior kernel parameters dL_dK to be chained
|
||||
with dK_dthetaK to give dL_dthetaK
|
||||
:returns: dL_dK matrix
|
||||
:rtype: Matrix (1 x num_kernel_params)
|
||||
"""
|
||||
dL_dfhat, I_KW_i = self._shared_gradients_components()
|
||||
dlp = self.noise_model.dlogpdf_df(self.f_hat, self.data, extra_data=self.extra_data)
|
||||
|
||||
#Explicit
|
||||
#expl_a = np.dot(self.Ki_f, self.Ki_f.T)
|
||||
#expl_b = self.Wi_K_i
|
||||
#expl = 0.5*expl_a - 0.5*expl_b
|
||||
#dL_dthetaK_exp = dK_dthetaK(expl, X)
|
||||
|
||||
#Implicit
|
||||
impl = mdot(dlp, dL_dfhat, I_KW_i)
|
||||
|
||||
#No longer required as we are computing these in the gp already
|
||||
#otherwise we would take them away and add them back
|
||||
#dL_dthetaK_imp = dK_dthetaK(impl, X)
|
||||
#dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
|
||||
#dL_dK = expl + impl
|
||||
|
||||
#No need to compute explicit as we are computing dZ_dK to account
|
||||
#for the difference between the K gradients of a normal GP,
|
||||
#and the K gradients including the implicit part
|
||||
dL_dK = impl
|
||||
return dL_dK
|
||||
|
||||
def _gradients(self, partial):
|
||||
"""
|
||||
Gradients with respect to likelihood parameters (dL_dthetaL)
|
||||
|
||||
:param partial: Not needed by this likelihood
|
||||
:type partial: lambda function
|
||||
:rtype: array of derivatives (1 x num_likelihood_params)
|
||||
"""
|
||||
dL_dfhat, I_KW_i = self._shared_gradients_components()
|
||||
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = self.noise_model._laplace_gradients(self.f_hat, self.data, extra_data=self.extra_data)
|
||||
|
||||
#len(dlik_dthetaL)
|
||||
num_params = len(self._get_param_names())
|
||||
# make space for one derivative for each likelihood parameter
|
||||
dL_dthetaL = np.zeros(num_params)
|
||||
for thetaL_i in range(num_params):
|
||||
#Explicit
|
||||
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[:, thetaL_i])
|
||||
#- 0.5*np.trace(mdot(self.Ki_W_i, (self.K, np.diagflat(dlik_hess_dthetaL[thetaL_i]))))
|
||||
+ np.dot(0.5*np.diag(self.Ki_W_i)[:,None].T, dlik_hess_dthetaL[:, thetaL_i])
|
||||
)
|
||||
|
||||
#Implicit
|
||||
dfhat_dthetaL = mdot(I_KW_i, self.K, dlik_grad_dthetaL[:, thetaL_i])
|
||||
dL_dthetaL_imp = np.dot(dL_dfhat, dfhat_dthetaL)
|
||||
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
|
||||
|
||||
return dL_dthetaL
|
||||
|
||||
def _compute_GP_variables(self):
|
||||
"""
|
||||
Generate data Y which would give the normal distribution identical
|
||||
to the laplace approximation to the posterior, but normalised
|
||||
|
||||
GPy expects a likelihood to be gaussian, so need to caluclate
|
||||
the data Y^{\tilde} that makes the posterior match that found
|
||||
by a laplace approximation to a non-gaussian likelihood but with
|
||||
a gaussian likelihood
|
||||
|
||||
Firstly,
|
||||
The hessian of the unormalised posterior distribution is (K^{-1} + W)^{-1},
|
||||
i.e. z*N(f|f^{\hat}, (K^{-1} + W)^{-1}) but this assumes a non-gaussian likelihood,
|
||||
we wish to find the hessian \Sigma^{\tilde}
|
||||
that has the same curvature but using our new simulated data Y^{\tilde}
|
||||
i.e. we do N(Y^{\tilde}|f^{\hat}, \Sigma^{\tilde})N(f|0, K) = z*N(f|f^{\hat}, (K^{-1} + W)^{-1})
|
||||
and we wish to find what Y^{\tilde} and \Sigma^{\tilde}
|
||||
We find that Y^{\tilde} = W^{-1}(K^{-1} + W)f^{\hat} and \Sigma^{tilde} = W^{-1}
|
||||
|
||||
Secondly,
|
||||
GPy optimizes the log marginal log p(y) = -0.5*ln|K+\Sigma^{\tilde}| - 0.5*Y^{\tilde}^{T}(K^{-1} + \Sigma^{tilde})^{-1}Y + lik.Z
|
||||
So we can suck up any differences between that and our log marginal likelihood approximation
|
||||
p^{\squiggle}(y) = -0.5*f^{\hat}K^{-1}f^{\hat} + log p(y|f^{\hat}) - 0.5*log |K||K^{-1} + W|
|
||||
which we want to optimize instead, by equating them and rearranging, the difference is added onto
|
||||
the log p(y) that GPy optimizes by default
|
||||
|
||||
Thirdly,
|
||||
Since we have gradients that depend on how we move f^{\hat}, we have implicit components
|
||||
aswell as the explicit dL_dK, we hold these differences in dZ_dK and add them to dL_dK in the
|
||||
gp.py code
|
||||
"""
|
||||
Wi = 1.0/self.W
|
||||
self.Sigma_tilde = np.diagflat(Wi)
|
||||
|
||||
Y_tilde = Wi*self.Ki_f + self.f_hat
|
||||
|
||||
self.Wi_K_i = self.W12BiW12
|
||||
ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
|
||||
lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
|
||||
y_Wi_K_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
|
||||
|
||||
Z_tilde = (+ lik
|
||||
- 0.5*self.ln_B_det
|
||||
+ 0.5*ln_det_Wi_K
|
||||
- 0.5*self.f_Ki_f
|
||||
+ 0.5*y_Wi_K_i_y
|
||||
+ self.NORMAL_CONST
|
||||
)
|
||||
|
||||
#Convert to float as its (1, 1) and Z must be a scalar
|
||||
self.Z = np.float64(Z_tilde)
|
||||
self.Y = Y_tilde
|
||||
self.YYT = np.dot(self.Y, self.Y.T)
|
||||
self.covariance_matrix = self.Sigma_tilde
|
||||
self.precision = 1.0 / np.diag(self.covariance_matrix)[:, None]
|
||||
|
||||
#Compute dZ_dK which is how the approximated distributions gradients differ from the dL_dK computed for other likelihoods
|
||||
self.dZ_dK = self._Kgradients()
|
||||
#+ 0.5*self.Wi_K_i - 0.5*np.dot(self.Ki_f, self.Ki_f.T) #since we are not adding the K gradients explicit part theres no need to compute this again
|
||||
|
||||
def fit_full(self, K):
|
||||
"""
|
||||
The laplace approximation algorithm, find K and expand hessian
|
||||
For nomenclature see Rasmussen & Williams 2006 - modified for numerical stability
|
||||
|
||||
:param K: Prior covariance matrix evaluated at locations X
|
||||
:type K: NxN matrix
|
||||
"""
|
||||
self.K = K.copy()
|
||||
# Compute K
|
||||
K = kern.K(X)
|
||||
|
||||
#Find mode
|
||||
self.f_hat = self.rasm_mode(self.K)
|
||||
if self.bad_fhat:
|
||||
Ki_f_init = np.zeros_like(Y)
|
||||
else:
|
||||
Ki_f_init = self._previous_Ki_fhat
|
||||
|
||||
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
|
||||
|
||||
#Compute hessian and other variables at mode
|
||||
self._compute_likelihood_variables()
|
||||
log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
|
||||
|
||||
#Compute fake variables replicating laplace approximation to posterior
|
||||
self._compute_GP_variables()
|
||||
kern.update_gradients_full(dL_dK, X)
|
||||
likelihood.update_gradients(dL_dthetaL)
|
||||
|
||||
def _compute_likelihood_variables(self):
|
||||
self._previous_Ki_fhat = Ki_fhat.copy()
|
||||
return Posterior(woodbury_vector=woodbury_vector, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK}
|
||||
|
||||
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None):
|
||||
"""
|
||||
Compute the variables required to compute gaussian Y variables
|
||||
Rasmussen's numerically stable mode finding
|
||||
For nomenclature see Rasmussen & Williams 2006
|
||||
Influenced by GPML (BSD) code, all errors are our own
|
||||
|
||||
:param K: Covariance matrix evaluated at locations X
|
||||
:type K: NxD matrix
|
||||
:param Y: The data
|
||||
:type Y: np.ndarray
|
||||
:param likelihood: the likelihood of the latent function value for the given data
|
||||
:type likelihood: a GPy.likelihood object
|
||||
:param Ki_f_init: the initial guess at the mode
|
||||
:type Ki_f_init: np.ndarray
|
||||
:param Y_metadata: information about the data, e.g. which likelihood to take from a multi-likelihood object
|
||||
:type Y_metadata: np.ndarray | None
|
||||
:returns: f_hat, mode on which to make laplace approxmiation
|
||||
:rtype: np.ndarray
|
||||
"""
|
||||
|
||||
Ki_f = Ki_f_init.copy()
|
||||
f = np.dot(K, Ki_f)
|
||||
|
||||
#define the objective function (to be maximised)
|
||||
def obj(Ki_f, f):
|
||||
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, extra_data=Y_metadata)
|
||||
|
||||
difference = np.inf
|
||||
iteration = 0
|
||||
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
|
||||
W = -likelihood.d2logpdf_df2(f, Y, extra_data=Y_metadata)
|
||||
|
||||
W_f = W*f
|
||||
grad = likelihood.dlogpdf_df(f, Y, extra_data=Y_metadata)
|
||||
|
||||
b = W_f + grad # R+W p46 line 6.
|
||||
#W12BiW12Kb, B_logdet = self._compute_B_statistics(K, W.copy(), np.dot(K, b), likelihood.log_concave)
|
||||
W12BiW12, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
W12BiW12Kb = np.dot(W12BiW12, np.dot(K, b))
|
||||
|
||||
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
|
||||
full_step_Ki_f = b - W12BiW12Kb # full_step_Ki_f = a in R&W p46 line 6.
|
||||
dKi_f = full_step_Ki_f - Ki_f
|
||||
|
||||
#define an objective for the line search (minimize this one)
|
||||
def inner_obj(step_size):
|
||||
Ki_f_trial = Ki_f + step_size*dKi_f
|
||||
f_trial = np.dot(K, Ki_f_trial)
|
||||
return -obj(Ki_f_trial, f_trial)
|
||||
|
||||
#use scipy for the line search, the compute new values of f, Ki_f
|
||||
step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
|
||||
Ki_f_new = Ki_f + step*dKi_f
|
||||
f_new = np.dot(K, Ki_f_new)
|
||||
|
||||
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
|
||||
Ki_f = Ki_f_new
|
||||
f = f_new
|
||||
iteration += 1
|
||||
|
||||
#Warn of bad fits
|
||||
if difference > self._mode_finding_tolerance:
|
||||
if not self.bad_fhat:
|
||||
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
|
||||
self.bad_fhat = True
|
||||
elif self.bad_fhat:
|
||||
self.bad_fhat = False
|
||||
warnings.warn("f_hat now fine again")
|
||||
|
||||
return f, Ki_f
|
||||
|
||||
def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, kern, Y_metadata):
|
||||
"""
|
||||
At the mode, compute the hessian and effective covariance matrix.
|
||||
|
||||
returns: logZ : approximation to the marginal likelihood
|
||||
woodbury_vector : variable required for calculating the approximation to the covariance matrix
|
||||
woodbury_inv : variable required for calculating the approximation to the covariance matrix
|
||||
dL_dthetaL : array of derivatives (1 x num_kernel_params)
|
||||
dL_dthetaL : array of derivatives (1 x num_likelihood_params)
|
||||
"""
|
||||
#At this point get the hessian matrix (or vector as W is diagonal)
|
||||
self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data)
|
||||
W = -likelihood.d2logpdf_df2(f_hat, Y, extra_data=Y_metadata)
|
||||
|
||||
if not self.noise_model.log_concave:
|
||||
#print "Under 1e-10: {}".format(np.sum(self.W < 1e-6))
|
||||
self.W[self.W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
|
||||
self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N))
|
||||
#compute vital matrices
|
||||
C = np.dot(LiW12, K)
|
||||
Ki_W_i = K - C.T.dot(C)
|
||||
|
||||
self.Ki_f = self.Ki_f
|
||||
self.f_Ki_f = np.dot(self.f_hat.T, self.Ki_f)
|
||||
self.Ki_W_i = self.K - mdot(self.K, self.W12BiW12, self.K)
|
||||
#compute the log marginal
|
||||
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, extra_data=Y_metadata) - np.sum(np.log(np.diag(L)))
|
||||
|
||||
def _compute_B_statistics(self, K, W, a):
|
||||
#Compute vival matrices for derivatives
|
||||
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, extra_data=Y_metadata) # -d3lik_d3fhat
|
||||
woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, extra_data=Y_metadata)
|
||||
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9.
|
||||
#BiK, _ = dpotrs(L, K, lower=1)
|
||||
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
|
||||
I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i)
|
||||
|
||||
####################
|
||||
#compute dL_dK#
|
||||
####################
|
||||
if kern.size > 0 and not kern.is_fixed:
|
||||
#Explicit
|
||||
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
|
||||
|
||||
#Implicit
|
||||
implicit_part = np.dot(woodbury_vector, dL_dfhat.T).dot(I_KW_i)
|
||||
|
||||
dL_dK = explicit_part + implicit_part
|
||||
else:
|
||||
dL_dK = np.zeros(likelihood.size)
|
||||
|
||||
####################
|
||||
#compute dL_dthetaL#
|
||||
####################
|
||||
if likelihood.size > 0 and not likelihood.is_fixed:
|
||||
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, extra_data=Y_metadata)
|
||||
|
||||
num_params = likelihood.size
|
||||
# make space for one derivative for each likelihood parameter
|
||||
dL_dthetaL = np.zeros(num_params)
|
||||
for thetaL_i in range(num_params):
|
||||
#Explicit
|
||||
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i])
|
||||
# The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL
|
||||
+ 0.5*np.sum(np.diag(Ki_W_i).flatten()*dlik_hess_dthetaL[:, thetaL_i].flatten())
|
||||
)
|
||||
|
||||
#Implicit
|
||||
dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[:, thetaL_i])
|
||||
#dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[:, thetaL_i])
|
||||
dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL)
|
||||
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
|
||||
|
||||
else:
|
||||
dL_dthetaL = np.zeros(likelihood.size)
|
||||
|
||||
return log_marginal, woodbury_vector, K_Wi_i, dL_dK, dL_dthetaL
|
||||
|
||||
def _compute_B_statistics(self, K, W, log_concave):
|
||||
"""
|
||||
Rasmussen suggests the use of a numerically stable positive definite matrix B
|
||||
Which has a positive diagonal element and can be easyily inverted
|
||||
|
|
@ -268,136 +212,28 @@ class LaplaceInference(object):
|
|||
:type K: NxN matrix
|
||||
:param W: Negative hessian at a point (diagonal matrix)
|
||||
:type W: Vector of diagonal values of hessian (1xN)
|
||||
:param a: Matrix to calculate W12BiW12a
|
||||
:type a: Matrix NxN
|
||||
:returns: (W12BiW12, ln_B_det)
|
||||
:returns: (W12BiW12, L_B, Li_W12)
|
||||
"""
|
||||
if not self.noise_model.log_concave:
|
||||
if not log_concave:
|
||||
#print "Under 1e-10: {}".format(np.sum(W < 1e-6))
|
||||
W[W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||
# To cause the posterior to become less certain than the prior and likelihood,
|
||||
# This is a property only held by non-log-concave likelihoods
|
||||
|
||||
|
||||
#W is diagonal so its sqrt is just the sqrt of the diagonal elements
|
||||
W_12 = np.sqrt(W)
|
||||
B = np.eye(self.N) + W_12*K*W_12.T
|
||||
B = np.eye(K.shape[0]) + W_12*K*W_12.T
|
||||
L = jitchol(B)
|
||||
|
||||
W12BiW12a = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
|
||||
ln_B_det = 2*np.sum(np.log(np.diag(L)))
|
||||
return W12BiW12a, ln_B_det
|
||||
LiW12, _ = dtrtrs(L, np.diagflat(W_12), lower=1, trans=0)
|
||||
K_Wi_i = np.dot(LiW12.T, LiW12) # R = W12BiW12, in R&W p 126, eq 5.25
|
||||
|
||||
def rasm_mode(self, K, MAX_ITER=40):
|
||||
"""
|
||||
Rasmussen's numerically stable mode finding
|
||||
For nomenclature see Rasmussen & Williams 2006
|
||||
Influenced by GPML (BSD) code, all errors are our own
|
||||
#here's a better way to compute the required matrix.
|
||||
# you could do the model finding witha backsub, instead of a dot...
|
||||
#L2 = L/W_12
|
||||
#K_Wi_i_2 , _= dpotri(L2)
|
||||
#symmetrify(K_Wi_i_2)
|
||||
|
||||
:param K: Covariance matrix evaluated at locations X
|
||||
:type K: NxD matrix
|
||||
:param MAX_ITER: Maximum number of iterations of newton-raphson before forcing finish of optimisation
|
||||
:type MAX_ITER: scalar
|
||||
:returns: f_hat, mode on which to make laplace approxmiation
|
||||
:rtype: NxD matrix
|
||||
"""
|
||||
#old_Ki_f = np.zeros((self.N, 1))
|
||||
return K_Wi_i, L, LiW12
|
||||
|
||||
#Start f's at zero originally of if we have gone off track, try restarting
|
||||
if self.old_Ki_f is None or self.bad_fhat:
|
||||
old_Ki_f = np.random.rand(self.N, 1)/50.0
|
||||
#old_Ki_f = self.Y
|
||||
f = np.dot(K, old_Ki_f)
|
||||
else:
|
||||
#Start at the old best point
|
||||
old_Ki_f = self.old_Ki_f.copy()
|
||||
f = self.f_hat.copy()
|
||||
|
||||
new_obj = -np.inf
|
||||
old_obj = np.inf
|
||||
|
||||
def obj(Ki_f, f):
|
||||
return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
|
||||
|
||||
difference = np.inf
|
||||
epsilon = 1e-7
|
||||
#step_size = 1
|
||||
#rs = 0
|
||||
i = 0
|
||||
|
||||
while difference > epsilon and i < MAX_ITER:
|
||||
W = -self.noise_model.d2logpdf_df2(f, self.data, extra_data=self.extra_data)
|
||||
|
||||
W_f = W*f
|
||||
grad = self.noise_model.dlogpdf_df(f, self.data, extra_data=self.extra_data)
|
||||
|
||||
b = W_f + grad
|
||||
W12BiW12Kb, _ = self._compute_B_statistics(K, W.copy(), np.dot(K, b))
|
||||
|
||||
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
|
||||
full_step_Ki_f = b - W12BiW12Kb
|
||||
dKi_f = full_step_Ki_f - old_Ki_f
|
||||
|
||||
f_old = f.copy()
|
||||
def inner_obj(step_size, old_Ki_f, dKi_f, K):
|
||||
Ki_f = old_Ki_f + step_size*dKi_f
|
||||
f = np.dot(K, Ki_f)
|
||||
# This is nasty, need to set something within an optimization though
|
||||
self.tmp_Ki_f = Ki_f.copy()
|
||||
self.tmp_f = f.copy()
|
||||
return -obj(Ki_f, f)
|
||||
|
||||
i_o = partial_func(inner_obj, old_Ki_f=old_Ki_f, dKi_f=dKi_f, K=K)
|
||||
#Find the stepsize that minimizes the objective function using a brent line search
|
||||
#The tolerance and maxiter matter for speed! Seems to be best to keep them low and make more full
|
||||
#steps than get this exact then make a step, if B was bigger it might be the other way around though
|
||||
#new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':5}).fun
|
||||
new_obj = sp.optimize.brent(i_o, tol=1e-4, maxiter=10)
|
||||
f = self.tmp_f.copy()
|
||||
Ki_f = self.tmp_Ki_f.copy()
|
||||
|
||||
#Optimize without linesearch
|
||||
#f_old = f.copy()
|
||||
#update_passed = False
|
||||
#while not update_passed:
|
||||
#Ki_f = old_Ki_f + step_size*dKi_f
|
||||
#f = np.dot(K, Ki_f)
|
||||
|
||||
#old_obj = new_obj
|
||||
#new_obj = obj(Ki_f, f)
|
||||
#difference = new_obj - old_obj
|
||||
##print "difference: ",difference
|
||||
#if difference < 0:
|
||||
##print "Objective function rose", np.float(difference)
|
||||
##If the objective function isn't rising, restart optimization
|
||||
#step_size *= 0.8
|
||||
##print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size)
|
||||
##objective function isn't increasing, try reducing step size
|
||||
#f = f_old.copy() #it's actually faster not to go back to old location and just zigzag across the mode
|
||||
#old_obj = new_obj
|
||||
#rs += 1
|
||||
#else:
|
||||
#update_passed = True
|
||||
|
||||
#old_Ki_f = self.Ki_f.copy()
|
||||
|
||||
#difference = abs(new_obj - old_obj)
|
||||
#old_obj = new_obj.copy()
|
||||
difference = np.abs(np.sum(f - f_old)) + np.abs(np.sum(Ki_f - old_Ki_f))
|
||||
#difference = np.abs(np.sum(Ki_f - old_Ki_f))/np.float(self.N)
|
||||
old_Ki_f = Ki_f.copy()
|
||||
i += 1
|
||||
|
||||
self.old_Ki_f = old_Ki_f.copy()
|
||||
|
||||
#Warn of bad fits
|
||||
if difference > epsilon:
|
||||
self.bad_fhat = True
|
||||
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
|
||||
elif self.bad_fhat:
|
||||
self.bad_fhat = False
|
||||
warnings.warn("f_hat now perfect again")
|
||||
|
||||
self.Ki_f = Ki_f
|
||||
return f
|
||||
|
|
|
|||
|
|
@ -2,19 +2,20 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ...util.linalg import pdinv, dpotrs, tdot, dtrtrs
|
||||
from ...util.linalg import pdinv, dpotrs, tdot, dtrtrs, dpotri, symmetrify
|
||||
|
||||
class Posterior(object):
|
||||
"""
|
||||
An object to represent a Gaussian posterior over latent function values.
|
||||
An object to represent a Gaussian posterior over latent function values, p(f|D).
|
||||
This may be computed exactly for Gaussian likelihoods, or approximated for
|
||||
non-Gaussian likelihoods.
|
||||
|
||||
The purpose of this class is to serve as an interface between the inference
|
||||
schemes and the model classes.
|
||||
schemes and the model classes. the model class can make predictions for
|
||||
the function at any new point x_* by integrating over this posterior.
|
||||
|
||||
"""
|
||||
def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None):
|
||||
def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None):
|
||||
"""
|
||||
woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K
|
||||
woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M
|
||||
|
|
@ -45,7 +46,10 @@ class Posterior(object):
|
|||
#obligatory
|
||||
self._K = K
|
||||
|
||||
if ((woodbury_chol is not None) and (woodbury_vector is not None) and (K is not None)) or ((mean is not None) and (cov is not None) and (K is not None)):
|
||||
if ((woodbury_chol is not None) and (woodbury_vector is not None))\
|
||||
or ((woodbury_inv is not None) and (woodbury_vector is not None))\
|
||||
or ((woodbury_inv is not None) and (mean is not None))\
|
||||
or ((mean is not None) and (cov is not None)):
|
||||
pass # we have sufficient to compute the posterior
|
||||
else:
|
||||
raise ValueError, "insufficient information to compute the posterior"
|
||||
|
|
@ -56,6 +60,10 @@ class Posterior(object):
|
|||
self._woodbury_chol = woodbury_chol
|
||||
self._woodbury_vector = woodbury_vector
|
||||
|
||||
#option 2.
|
||||
self._woodbury_inv = woodbury_inv
|
||||
#and woodbury vector
|
||||
|
||||
#option 2:
|
||||
self._mean = mean
|
||||
self._covariance = cov
|
||||
|
|
@ -66,7 +74,7 @@ class Posterior(object):
|
|||
@property
|
||||
def mean(self):
|
||||
if self._mean is None:
|
||||
self._mean = np.dot(self._K, self._woodbury_vector)
|
||||
self._mean = np.dot(self._K, self.woodbury_vector)
|
||||
return self._mean
|
||||
|
||||
@property
|
||||
|
|
@ -85,16 +93,27 @@ class Posterior(object):
|
|||
@property
|
||||
def woodbury_chol(self):
|
||||
if self._woodbury_chol is None:
|
||||
B = self._K - self._covariance
|
||||
tmp, _ = dpotrs(self._K_chol, B)
|
||||
Wi, _ = dpotrs(self._K_chol, tmp.T)
|
||||
_, _, self._woodbury_chol, _ = pdinv(Wi)
|
||||
#try computing woodbury chol from cov
|
||||
if self._woodbury_inv is not None:
|
||||
_, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
|
||||
elif self._covariance is not None:
|
||||
B = self._K - self._covariance
|
||||
tmp, _ = dpotrs(self.K_chol, B)
|
||||
self._woodbury_inv, _ = dpotrs(self.K_chol, tmp.T)
|
||||
_, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
|
||||
return self._woodbury_chol
|
||||
|
||||
@property
|
||||
def woodbury_inv(self):
|
||||
if self._woodbury_inv is None:
|
||||
self._woodbury_inv, _ = dpotri(self.woodbury_chol)
|
||||
symmetrify(self._woodbury_inv)
|
||||
return self._woodbury_inv
|
||||
|
||||
@property
|
||||
def woodbury_vector(self):
|
||||
if self._woodbury_vector is None:
|
||||
self._woodbury_vector, _ = dpotrs(self._K_chol, self.mean)
|
||||
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean)
|
||||
return self._woodbury_vector
|
||||
|
||||
@property
|
||||
|
|
|
|||
|
|
@ -2,8 +2,10 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from posterior import Posterior
|
||||
from ...util.linalg import pdinv, dpotrs, tdot
|
||||
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
|
||||
import numpy as np
|
||||
from ...util.caching import Cacher
|
||||
from ...util.misc import param_to_array
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class VarDTC(object):
|
||||
|
|
@ -17,10 +19,15 @@ class VarDTC(object):
|
|||
|
||||
"""
|
||||
def __init__(self):
|
||||
self._YYTfactor_cache = caching.cache()
|
||||
#self._YYTfactor_cache = caching.cache()
|
||||
self.const_jitter = 1e-6
|
||||
self.get_trYYT = Cacher(self._get_trYYT, 1)
|
||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, 1)
|
||||
|
||||
def _get_trYYT(self, Y):
|
||||
return param_to_array(np.sum(np.square(Y)))
|
||||
|
||||
def get_YYTfactor(self, Y):
|
||||
def _get_YYTfactor(self, Y):
|
||||
"""
|
||||
find a matrix L which satisfies LLT = YYT.
|
||||
|
||||
|
|
@ -28,11 +35,10 @@ class VarDTC(object):
|
|||
"""
|
||||
N, D = Y.shape
|
||||
if (N>D):
|
||||
return Y
|
||||
return param_to_array(Y)
|
||||
else:
|
||||
#if Y in self.cache, return self.Cache[Y], else stor Y in cache and return L.
|
||||
raise NotImplementedError, 'TODO' #TODO
|
||||
|
||||
return jitchol(tdot(Y))
|
||||
|
||||
def get_VVTfactor(self, Y, prec):
|
||||
return Y * prec # TODO chache this, and make it effective
|
||||
|
||||
|
|
@ -42,7 +48,13 @@ class VarDTC(object):
|
|||
num_data, output_dim = Y.shape
|
||||
|
||||
#see whether we're using variational uncertain inputs
|
||||
uncertain_inputs = (X_variance is None)
|
||||
uncertain_inputs = not (X_variance is None)
|
||||
|
||||
#see whether we've got a different noise variance for each datum
|
||||
beta = 1./np.squeeze(likelihood.variance)
|
||||
het_noise = False
|
||||
if beta.size <1:
|
||||
het_noise = True
|
||||
|
||||
# kernel computations, using BGPLVM notation
|
||||
Kmm = kern.K(Z)
|
||||
|
|
@ -59,30 +71,38 @@ class VarDTC(object):
|
|||
|
||||
# The rather complex computations of A
|
||||
if uncertain_inputs:
|
||||
if likelihood.is_heteroscedastic:
|
||||
psi2_beta = (psi2 * (likelihood.precision.flatten().reshape(num_data, 1, 1))).sum(0)
|
||||
if het_noise:
|
||||
psi2_beta = (psi2 * (beta.flatten().reshape(num_data, 1, 1))).sum(0)
|
||||
else:
|
||||
psi2_beta = psi2.sum(0) * likelihood.precision
|
||||
evals, evecs = linalg.eigh(psi2_beta)
|
||||
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
|
||||
if not np.array_equal(evals, clipped_evals):
|
||||
pass # print evals
|
||||
tmp = evecs * np.sqrt(clipped_evals)
|
||||
tmp = tmp.T
|
||||
psi2_beta = psi2.sum(0) * beta
|
||||
if 0:
|
||||
evals, evecs = linalg.eigh(psi2_beta)
|
||||
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
|
||||
if not np.array_equal(evals, clipped_evals):
|
||||
pass # print evals
|
||||
tmp = evecs * np.sqrt(clipped_evals)
|
||||
tmp = tmp.T
|
||||
# no backsubstitution because of bound explosion on tr(A) if not...
|
||||
LmInv, _ = dtrtri(Lm, lower=1)
|
||||
A = LmInv.dot(psi2_beta.dot(LmInv.T))
|
||||
#print A.sum()
|
||||
else:
|
||||
if likelihood.is_heteroscedastic:
|
||||
tmp = psi1 * (np.sqrt(likelihood.precision.flatten().reshape(num_data, 1)))
|
||||
if het_noise:
|
||||
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
|
||||
else:
|
||||
tmp = psi1 * (np.sqrt(likelihood.precision))
|
||||
tmp, _ = dtrtrs(Lm, np.asfortranarray(tmp.T), lower=1)
|
||||
A = tdot(tmp)
|
||||
tmp = psi1 * (np.sqrt(beta))
|
||||
tmp, _ = dtrtrs(Lm, np.asfortranarray(tmp.T), lower=1)
|
||||
A = tdot(tmp)
|
||||
|
||||
# factor B
|
||||
B = np.eye(num_inducing) + A
|
||||
self.A = A
|
||||
LB = jitchol(B)
|
||||
|
||||
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
|
||||
VVT_factor = self.get_VVTfactor(Y, likelihood.precision)
|
||||
self.YYTfactor = self.get_YYTfactor(Y)
|
||||
VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
|
||||
trYYT = self.get_trYYT(Y)
|
||||
psi1Vf = np.dot(psi1.T, VVT_factor)
|
||||
|
||||
# back substutue C into psi1Vf
|
||||
|
|
@ -91,17 +111,6 @@ class VarDTC(object):
|
|||
tmp, info2 = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
|
||||
Cpsi1Vf, info3 = dtrtrs(Lm, tmp, lower=1, trans=1)
|
||||
|
||||
#compute log marginal likelihood
|
||||
if likelihood.is_heteroscedastic:
|
||||
A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
|
||||
B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A))
|
||||
else:
|
||||
A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT
|
||||
B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A))
|
||||
C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2))
|
||||
D = 0.5 * data_fit
|
||||
log_marginal = A + B + C + D
|
||||
|
||||
|
||||
# Compute dL_dKmm
|
||||
tmp = tdot(_LBi_Lmi_psi1Vf)
|
||||
|
|
@ -112,21 +121,20 @@ class VarDTC(object):
|
|||
tmp += output_dim * np.eye(num_inducing)
|
||||
dL_dKmm = backsub_both_sides(Lm, tmp)
|
||||
|
||||
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
|
||||
dL_dpsi0 = -0.5 * output_dim * (likelihood.precision * np.ones([num_data, 1])).flatten()
|
||||
dL_dpsi1 = np.dot(likelihood.VVT_factor, Cpsi1Vf.T)
|
||||
# Compute dL_dpsi
|
||||
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
|
||||
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
|
||||
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
|
||||
|
||||
if likelihood.is_heteroscedastic:
|
||||
|
||||
if has_uncertain_inputs:
|
||||
dL_dpsi2 = likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
|
||||
if het_noise:
|
||||
if uncertain_inputs:
|
||||
dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :]
|
||||
else:
|
||||
dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * likelihood.precision.reshape(num_data, 1)).T).T
|
||||
dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * beta.reshape(num_data, 1)).T).T
|
||||
dL_dpsi2 = None
|
||||
else:
|
||||
dL_dpsi2 = likelihood.precision * dL_dpsi2_beta
|
||||
if has_uncertain_inputs:
|
||||
dL_dpsi2 = beta * dL_dpsi2_beta
|
||||
if uncertain_inputs:
|
||||
# repeat for each of the N psi_2 matrices
|
||||
dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0)
|
||||
else:
|
||||
|
|
@ -139,41 +147,66 @@ class VarDTC(object):
|
|||
if likelihood.size == 0:
|
||||
# save computation here.
|
||||
partial_for_likelihood = None
|
||||
elif likelihood.is_heteroscedastic:
|
||||
|
||||
if has_uncertain_inputs:
|
||||
elif het_noise:
|
||||
if uncertain_inputs:
|
||||
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
|
||||
|
||||
else:
|
||||
|
||||
LBi = chol_inv(LB)
|
||||
Lmi_psi1, nil = dtrtrs(Lm, np.asfortranarray(psi1.T), lower=1, trans=0)
|
||||
_LBi_Lmi_psi1, _ = dtrtrs(LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
|
||||
|
||||
partial_for_likelihood = -0.5 * beta + 0.5 * likelihood.V**2
|
||||
partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
|
||||
|
||||
partial_for_likelihood = -0.5 * likelihood.precision + 0.5 * likelihood.V**2
|
||||
partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * likelihood.precision**2
|
||||
partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
|
||||
|
||||
partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*likelihood.precision**2
|
||||
|
||||
partial_for_likelihood += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * likelihood.precision**2
|
||||
partial_for_likelihood += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * likelihood.precision**2
|
||||
partial_for_likelihood += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * beta**2
|
||||
partial_for_likelihood += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
|
||||
|
||||
else:
|
||||
# likelihood is not heteroscedatic
|
||||
partial_for_likelihood = -0.5 * num_data * output_dim * likelihood.precision + 0.5 * likelihood.trYYT * likelihood.precision ** 2
|
||||
partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * likelihood.precision ** 2 - np.trace(_A) * likelihood.precision)
|
||||
partial_for_likelihood += likelihood.precision * (0.5 * np.sum(_A * DBi_plus_BiPBi) - data_fit)
|
||||
partial_for_likelihood = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
|
||||
partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
|
||||
partial_for_likelihood += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
|
||||
|
||||
#compute log marginal likelihood
|
||||
if het_noise:
|
||||
lik_1 = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
|
||||
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))
|
||||
else:
|
||||
lik_1 = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(beta)) - 0.5 * beta * trYYT
|
||||
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))
|
||||
lik_3 = -output_dim * (np.sum(np.log(np.diag(LB))))
|
||||
lik_4 = 0.5 * data_fit
|
||||
log_marginal = lik_1 + lik_2 + lik_3 + lik_4
|
||||
|
||||
#put the gradients in the right places
|
||||
likelihood.update_gradients(partial_for_likelihood)
|
||||
|
||||
if uncertain_inputs:
|
||||
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2}
|
||||
kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict)
|
||||
else:
|
||||
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1}
|
||||
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
|
||||
|
||||
#get sufficient things for posterior prediction
|
||||
#TODO: do we really want to do this in the loop?
|
||||
if VVT_factor.shape[1] == Y.shape[1]:
|
||||
Cpsi1V = Cpsi1Vf
|
||||
woodbury_vector = Cpsi1Vf # == Cpsi1V
|
||||
else:
|
||||
raise NotImplementedError #TODO
|
||||
psi1V = np.dot(Y.T*beta, psi1).T
|
||||
tmp, _ = dtrtrs(Lm, np.asfortranarray(psi1V), lower=1, trans=0)
|
||||
tmp, _ = dpotrs(LB, tmp, lower=1)
|
||||
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
|
||||
Bi, _ = dpotri(LB, lower=0)
|
||||
symmetrify(Bi)
|
||||
woodbury_inv = backsub_both_sides(Lm, np.eye(num_inducing) - Bi)
|
||||
|
||||
|
||||
#construct a posterior object
|
||||
post = Posterior(woodbury_chol=None, woodbury_vector=Cpsi1V, K=None, mean=None, cov=None, K_chol=None)
|
||||
return
|
||||
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
|
||||
|
||||
return post, log_marginal, grad_dict
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -69,8 +69,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
|
|||
success = True # Force calculation of directional derivs.
|
||||
nsuccess = 0 # nsuccess counts number of successes.
|
||||
beta = 1.0 # Initial scale parameter.
|
||||
betamin = 1.0e-60 # Lower bound on scale.
|
||||
betamax = 1.0e50 # Upper bound on scale.
|
||||
betamin = 1.0e-15 # Lower bound on scale.
|
||||
betamax = 1.0e15 # Upper bound on scale.
|
||||
status = "Not converged"
|
||||
|
||||
flog = [fold]
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from parts.prod import Prod as prod
|
|||
from parts.linear import Linear
|
||||
from parts.kernpart import Kernpart
|
||||
from ..core.parameterization import Parameterized
|
||||
from GPy.core.parameterization.param import Param
|
||||
|
||||
class kern(Parameterized):
|
||||
def __init__(self, input_dim, parts=[], input_slices=None):
|
||||
|
|
@ -84,7 +85,7 @@ class kern(Parameterized):
|
|||
# represents the gradient in the transformed space (i.e. that given by
|
||||
# get_params_transformed())
|
||||
#
|
||||
# :param g: the gradient vector for the current model, usually created by dK_dtheta
|
||||
# :param g: the gradient vector for the current model, usually created by _param_grad_helper
|
||||
# """
|
||||
# x = self._get_params()
|
||||
# [np.place(g, index, g[index] * constraint.gradfactor(x[index]))
|
||||
|
|
@ -152,14 +153,16 @@ class kern(Parameterized):
|
|||
# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
|
||||
# newkern.fixed_values = self.fixed_values + other.fixed_values
|
||||
# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
|
||||
[newkern._add_constrain(param, transform, warning=False)
|
||||
for param, transform in itertools.izip(
|
||||
*itertools.chain(self.constraints.iteritems(),
|
||||
other.constraints.iteritems()))]
|
||||
|
||||
[newkern.constraints.add(transform, ind) for transform, ind in self.constraints.iteritems()]
|
||||
[newkern.constraints.add(transform, ind+self.size) for transform, ind in other.constraints.iteritems()]
|
||||
newkern._fixes_ = ((self._fixes_ or 0) + (other._fixes_ or 0)) or None
|
||||
|
||||
return newkern
|
||||
|
||||
def __call__(self, X, X2=None):
|
||||
return self.K(X, X2)
|
||||
|
||||
def __mul__(self, other):
|
||||
""" Here we overload the '*' operator. See self.prod for more information"""
|
||||
return self.prod(other)
|
||||
|
|
@ -180,8 +183,10 @@ class kern(Parameterized):
|
|||
:type tensor: bool
|
||||
|
||||
"""
|
||||
K1 = self.copy()
|
||||
K2 = other.copy()
|
||||
K1 = self
|
||||
K2 = other
|
||||
#K1 = self.copy()
|
||||
#K2 = other.copy()
|
||||
|
||||
slices = []
|
||||
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
|
||||
|
|
@ -284,11 +289,12 @@ class kern(Parameterized):
|
|||
[p.update_gradients_full(dL_dK, X) for p in self._parameters_]
|
||||
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
raise NotImplementedError
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
raise NotImplementedError
|
||||
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_]
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2=None):
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
|
||||
|
||||
def _param_grad_helper(self, dL_dK, X, X2=None):
|
||||
"""
|
||||
Compute the gradient of the covariance function with respect to the parameters.
|
||||
|
||||
|
|
@ -304,9 +310,9 @@ class kern(Parameterized):
|
|||
assert X.shape[1] == self.input_dim
|
||||
target = np.zeros(self.size)
|
||||
if X2 is None:
|
||||
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||
[p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||
else:
|
||||
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||
[p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||
|
||||
return self._transform_gradients(target)
|
||||
|
||||
|
|
@ -478,6 +484,7 @@ from GPy.core.model import Model
|
|||
class Kern_check_model(Model):
|
||||
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Model.__init__(self, 'kernel_test_model')
|
||||
num_samples = 20
|
||||
num_samples2 = 10
|
||||
if kernel==None:
|
||||
|
|
@ -489,14 +496,12 @@ class Kern_check_model(Model):
|
|||
dL_dK = np.ones((X.shape[0], X.shape[0]))
|
||||
else:
|
||||
dL_dK = np.ones((X.shape[0], X2.shape[0]))
|
||||
|
||||
|
||||
self.kernel=kernel
|
||||
self.add_parameter(kernel)
|
||||
self.X = X
|
||||
self.X2 = X2
|
||||
self.dL_dK = dL_dK
|
||||
#self.constrained_indices=[]
|
||||
#self.constraints=[]
|
||||
Model.__init__(self, 'kernel_test_model')
|
||||
|
||||
def is_positive_definite(self):
|
||||
v = np.linalg.eig(self.kernel.K(self.X))[0]
|
||||
|
|
@ -505,15 +510,6 @@ class Kern_check_model(Model):
|
|||
else:
|
||||
return True
|
||||
|
||||
def _get_params(self):
|
||||
return self.kernel._get_params()
|
||||
|
||||
def _get_param_names(self):
|
||||
return self.kernel._get_param_names()
|
||||
|
||||
def _set_params(self, x):
|
||||
self.kernel._set_params(x)
|
||||
|
||||
def log_likelihood(self):
|
||||
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
|
||||
|
||||
|
|
@ -526,7 +522,7 @@ class Kern_check_dK_dtheta(Kern_check_model):
|
|||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel.dK_dtheta(self.dL_dK, self.X, self.X2)
|
||||
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
|
||||
|
||||
class Kern_check_dKdiag_dtheta(Kern_check_model):
|
||||
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
|
||||
|
|
@ -534,6 +530,8 @@ class Kern_check_dKdiag_dtheta(Kern_check_model):
|
|||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||
if dL_dK==None:
|
||||
self.dL_dK = np.ones((self.X.shape[0]))
|
||||
def parameters_changed(self):
|
||||
self.kernel.update_gradients_full(self.dL_dK, self.X)
|
||||
|
||||
def log_likelihood(self):
|
||||
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
||||
|
|
@ -545,41 +543,25 @@ class Kern_check_dK_dX(Kern_check_model):
|
|||
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||
|
||||
self.remove_parameter(kernel)
|
||||
self.X = Param('X', self.X)
|
||||
self.add_parameter(self.X)
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel.dK_dX(self.dL_dK, self.X, self.X2).flatten()
|
||||
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
|
||||
|
||||
def _get_params(self):
|
||||
return self.X.flatten()
|
||||
|
||||
def _set_params(self, x):
|
||||
self.X=x.reshape(self.X.shape)
|
||||
|
||||
class Kern_check_dKdiag_dX(Kern_check_model):
|
||||
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
||||
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||
if dL_dK==None:
|
||||
self.dL_dK = np.ones((self.X.shape[0]))
|
||||
|
||||
|
||||
def log_likelihood(self):
|
||||
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
|
||||
|
||||
def _get_params(self):
|
||||
return self.X.flatten()
|
||||
|
||||
def _set_params(self, x):
|
||||
self.X=x.reshape(self.X.shape)
|
||||
|
||||
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
||||
"""
|
||||
This function runs on kernels to check the correctness of their
|
||||
|
|
@ -654,7 +636,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
|||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
print("gradients_X not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
@ -670,7 +652,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
|||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
print("gradients_X not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
@ -686,7 +668,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
|||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
print("gradients_X not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ class Brownian(Kernpart):
|
|||
def Kdiag(self,X,target):
|
||||
target += self.variance*X.flatten()
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
target += np.sum(np.fmin(X,X2.T)*dL_dK)
|
||||
|
|
@ -51,7 +51,7 @@ class Brownian(Kernpart):
|
|||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
target += np.dot(X.flatten(), dL_dKdiag)
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
raise NotImplementedError, "TODO"
|
||||
#target += self.variance
|
||||
#target -= self.variance*theta(X-X2.T)
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ class Matern32(Kernpart):
|
|||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
np.add(target, self.variance, target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
||||
|
|
@ -96,7 +96,7 @@ class Matern32(Kernpart):
|
|||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None:
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
|
|
@ -105,8 +105,8 @@ class Matern32(Kernpart):
|
|||
else:
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
gradients_X = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
|
||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ class Matern52(Kernpart):
|
|||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
np.add(target,self.variance,target)
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
|
||||
|
|
@ -96,7 +96,7 @@ class Matern52(Kernpart):
|
|||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None:
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None]
|
||||
|
|
@ -104,8 +104,8 @@ class Matern52(Kernpart):
|
|||
else:
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
|
||||
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
|
||||
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
|
||||
target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
|
||||
gradients_X = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
|
||||
target += np.sum(gradients_X*dL_dK.T[:,:,None],0)
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ class ODE_1(Kernpart):
|
|||
|
||||
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.abs(X - X2.T)
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ class Eq_ode1(Kernpart):
|
|||
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
|
||||
pass
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
|
||||
# First extract times and indices.
|
||||
self._extract_t_indices(X, X2, dL_dK=dL_dK)
|
||||
|
|
@ -193,7 +193,7 @@ class Eq_ode1(Kernpart):
|
|||
def dKdiag_dtheta(self,dL_dKdiag,index,target):
|
||||
pass
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
pass
|
||||
|
||||
def _extract_t_indices(self, X, X2=None, dL_dK=None):
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ class Exponential(Kernpart):
|
|||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
np.add(target, self.variance, target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
|
||||
|
|
@ -95,13 +95,13 @@ class Exponential(Kernpart):
|
|||
# NB: derivative of diagonal elements wrt lengthscale is 0
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
gradients_X = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
|
||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ class FiniteDimensional(Kernpart):
|
|||
def Kdiag(self,X,target):
|
||||
product = np.diag(self.K(X, X))
|
||||
np.add(target,product,target)
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
def _param_grad_helper(self,X,X2,target):
|
||||
"""Return shape is NxMx(Ntheta)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = np.column_stack([f(X) for f in self.F])
|
||||
|
|
|
|||
|
|
@ -31,10 +31,10 @@ class Fixed(Kernpart):
|
|||
def K(self, X, X2, target):
|
||||
target += self.variance * self.fixed_K
|
||||
|
||||
def dK_dtheta(self, partial, X, X2, target):
|
||||
def _param_grad_helper(self, partial, X, X2, target):
|
||||
target += (partial * self.fixed_K).sum()
|
||||
|
||||
def dK_dX(self, partial, X, X2, target):
|
||||
def gradients_X(self, partial, X, X2, target):
|
||||
pass
|
||||
|
||||
def dKdiag_dX(self, partial, X, target):
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ class Gibbs(Kernpart):
|
|||
"""Compute the diagonal of the covariance matrix for X."""
|
||||
np.add(target, self.variance, target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
self._K_computations(X, X2)
|
||||
self._dK_computations(dL_dK)
|
||||
|
|
@ -97,7 +97,7 @@ class Gibbs(Kernpart):
|
|||
|
||||
target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping])
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X."""
|
||||
# First account for gradients arising from presence of X in exponent.
|
||||
self._K_computations(X, X2)
|
||||
|
|
@ -105,8 +105,8 @@ class Gibbs(Kernpart):
|
|||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
|
||||
dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX*dL_dK.T[:, :, None], 0)
|
||||
gradients_X = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
|
||||
target += np.sum(gradients_X*dL_dK.T[:, :, None], 0)
|
||||
# Now account for gradients arising from presence of X in lengthscale.
|
||||
self._dK_computations(dL_dK)
|
||||
if X2 is None:
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class Hetero(Kernpart):
|
|||
"""Helper function for computing the diagonal elements of the covariance."""
|
||||
return self.mapping.f(X).flatten()**2
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
if (X2 is None) or (X2 is X):
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
|
|
@ -90,7 +90,7 @@ class Hetero(Kernpart):
|
|||
"""Gradient of diagonal of covariance with respect to parameters."""
|
||||
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X."""
|
||||
if X2==None or X2 is X:
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
|
|
|
|||
|
|
@ -50,19 +50,19 @@ class Hierarchical(Kernpart):
|
|||
#X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
#[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
X, X2, slices, slices2 = self._sort_slices(X,X2)
|
||||
[[[[k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)]
|
||||
[[[[k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)]
|
||||
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
raise NotImplementedError
|
||||
#X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
#if X2 is None:
|
||||
#X2,slices2 = X,slices
|
||||
#else:
|
||||
#X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
#[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
#[[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
#
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
raise NotImplementedError
|
||||
|
|
|
|||
|
|
@ -70,22 +70,22 @@ class IndependentOutputs(Kernpart):
|
|||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
else:
|
||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
[[[self.k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
else:
|
||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
[[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ class Kernpart(Parameterized):
|
|||
# the number of optimisable parameters
|
||||
# the name of the covariance function.
|
||||
# link to parameterized objects
|
||||
self._parameters_ = []
|
||||
#self._X = None
|
||||
|
||||
def connect_input(self, X):
|
||||
|
|
@ -76,14 +75,14 @@ class Kernpart(Parameterized):
|
|||
raise NotImplementedError
|
||||
def Kdiag(self,X,target):
|
||||
raise NotImplementedError
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
raise NotImplementedError
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
# In the base case compute this by calling dK_dtheta. Need to
|
||||
# In the base case compute this by calling _param_grad_helper. Need to
|
||||
# override for stationary covariances (for example) to save
|
||||
# time.
|
||||
for i in range(X.shape[0]):
|
||||
self.dK_dtheta(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
|
||||
self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
|
||||
def psi0(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
|
||||
|
|
@ -106,12 +105,19 @@ class Kernpart(Parameterized):
|
|||
raise NotImplementedError
|
||||
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
raise NotImplementedError
|
||||
def dKdiag_dX(self, dL_dK, X, target):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||
raise NotImplementedError
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
"""Set the gradients of all parameters when doing sparse (M) inference."""
|
||||
raise NotImplementedError
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
|
||||
raise NotImplementedError
|
||||
|
||||
class Kernpart_stationary(Kernpart):
|
||||
def __init__(self, input_dim, lengthscale=None, ARD=False):
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from kernpart import Kernpart
|
|||
from ...util.linalg import tdot
|
||||
from ...util.misc import fast_array_equal, param_to_array
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
|
||||
class Linear(Kernpart):
|
||||
"""
|
||||
|
|
@ -43,8 +44,9 @@ class Linear(Kernpart):
|
|||
else:
|
||||
variances = np.ones(self.input_dim)
|
||||
|
||||
self.variances = Param('variances', variances)
|
||||
self.add_parameters(self.variances)
|
||||
self.variances = Param('variances', variances, Logexp())
|
||||
self.variances.gradient = np.zeros(self.variances.shape)
|
||||
self.add_parameter(self.variances)
|
||||
self.variances.add_observer(self, self.update_variance)
|
||||
|
||||
# initialize cache
|
||||
|
|
@ -57,21 +59,35 @@ class Linear(Kernpart):
|
|||
def on_input_change(self, X):
|
||||
self._K_computations(X, None)
|
||||
|
||||
# def _get_params(self):
|
||||
# return self.variances
|
||||
#
|
||||
# def _set_params(self, x):
|
||||
# assert x.size == (self.num_params)
|
||||
# self.variances = x
|
||||
#def parameters_changed(self):
|
||||
# self.variances2 = np.square(self.variances)
|
||||
#
|
||||
# def _get_param_names(self):
|
||||
# if self.num_params == 1:
|
||||
# return ['variance']
|
||||
# else:
|
||||
# return ['variance_%i' % i for i in range(self.variances.size)]
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
#self.variances.gradient[:] = 0
|
||||
self._param_grad_helper(dL_dK, X, self.variances.gradient)
|
||||
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
tmp = dL_dKdiag[:, None] * X ** 2
|
||||
if self.ARD:
|
||||
self.variances.gradient = tmp.sum(0)
|
||||
else:
|
||||
self.variances.gradient = tmp.sum()
|
||||
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
|
||||
self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient)
|
||||
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
self._psi_computations(Z, mu, S)
|
||||
# psi0:
|
||||
tmp = dL_dpsi0[:, None] * self.mu2_S
|
||||
if self.ARD: self.variances.gradient[:] = tmp.sum(0)
|
||||
else: self.variances.gradient[:] = tmp.sum()
|
||||
#psi1
|
||||
self._param_grad_helper(dL_dpsi1, mu, Z, self.variances.gradient)
|
||||
#psi2
|
||||
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
|
||||
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
|
||||
else: self.variances.gradient += tmp.sum()
|
||||
#from Kmm
|
||||
self._K_computations(Z, None)
|
||||
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
|
||||
|
||||
def K(self, X, X2, target):
|
||||
if self.ARD:
|
||||
XX = X * np.sqrt(self.variances)
|
||||
|
|
@ -88,7 +104,7 @@ class Linear(Kernpart):
|
|||
def Kdiag(self, X, target):
|
||||
np.add(target, np.sum(self.variances * np.square(X), -1), target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
if self.ARD:
|
||||
if X2 is None:
|
||||
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)]
|
||||
|
|
@ -100,14 +116,7 @@ class Linear(Kernpart):
|
|||
self._K_computations(X, X2)
|
||||
target += np.sum(self._dot_product * dL_dK)
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
tmp = dL_dKdiag[:, None] * X ** 2
|
||||
if self.ARD:
|
||||
target += tmp.sum(0)
|
||||
else:
|
||||
target += tmp.sum()
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
if X2 is None:
|
||||
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
else:
|
||||
|
|
@ -124,14 +133,6 @@ class Linear(Kernpart):
|
|||
self._psi_computations(Z, mu, S)
|
||||
target += np.sum(self.variances * self.mu2_S, 1)
|
||||
|
||||
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = dL_dpsi0[:, None] * self.mu2_S
|
||||
if self.ARD:
|
||||
target += tmp.sum(0)
|
||||
else:
|
||||
target += tmp.sum()
|
||||
|
||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
||||
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
|
||||
target_S += dL_dpsi0[:, None] * self.variances
|
||||
|
|
@ -140,17 +141,13 @@ class Linear(Kernpart):
|
|||
"""the variance, it does nothing"""
|
||||
self._psi1 = self.K(mu, Z, target)
|
||||
|
||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
|
||||
"""the variance, it does nothing"""
|
||||
self.dK_dtheta(dL_dpsi1, mu, Z, target)
|
||||
|
||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||
"""Do nothing for S, it does not affect psi1"""
|
||||
self._psi_computations(Z, mu, S)
|
||||
target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
|
||||
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||
self.dK_dX(dL_dpsi1.T, Z, mu, target)
|
||||
self.gradients_X(dL_dpsi1.T, Z, mu, target)
|
||||
|
||||
def psi2(self, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
|
|
@ -164,25 +161,17 @@ class Linear(Kernpart):
|
|||
def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target):
|
||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
self.K(mu,Z,tmp)
|
||||
self.dK_dtheta(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target)
|
||||
self._param_grad_helper(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target)
|
||||
result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0)
|
||||
if self.ARD:
|
||||
target += result.sum(0).sum(0).sum(0)
|
||||
else:
|
||||
target += result.sum()
|
||||
|
||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
|
||||
if self.ARD:
|
||||
target += tmp.sum(0).sum(0).sum(0)
|
||||
else:
|
||||
target += tmp.sum()
|
||||
|
||||
def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
self.K(mu,Z,tmp)
|
||||
self.dK_dX(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu)
|
||||
self.gradients_X(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu)
|
||||
|
||||
Zs = Z*self.variances
|
||||
Zs_sq = Zs[:,None,:]*Zs[None,:,:]
|
||||
|
|
@ -288,11 +277,11 @@ class Linear(Kernpart):
|
|||
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
|
||||
self._X = X.copy()
|
||||
if X2 is None:
|
||||
self._dot_product = tdot(X)
|
||||
self._dot_product = tdot(param_to_array(X))
|
||||
self._X2 = None
|
||||
else:
|
||||
self._X2 = X2.copy()
|
||||
self._dot_product = np.dot(X, X2.T)
|
||||
self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
|
||||
|
||||
def _psi_computations(self, Z, mu, S):
|
||||
# here are the "statistics" for psi1 and psi2
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ class MLP(Kernpart):
|
|||
self._K_diag_computations(X)
|
||||
target+= self.variance*self._K_diag_dvar
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
self._K_computations(X, X2)
|
||||
denom3 = self._K_denom*self._K_denom*self._K_denom
|
||||
|
|
@ -107,7 +107,7 @@ class MLP(Kernpart):
|
|||
|
||||
target[0] += np.sum(self._K_dvar*dL_dK)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X"""
|
||||
self._K_computations(X, X2)
|
||||
arg = self._K_asin_arg
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ class PeriodicMatern32(Kernpart):
|
|||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
||||
|
||||
@silence_errors
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ class PeriodicMatern52(Kernpart):
|
|||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
||||
|
||||
@silence_errors
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from kernpart import Kernpart
|
|||
import numpy as np
|
||||
from GPy.util.linalg import mdot
|
||||
from GPy.util.decorators import silence_errors
|
||||
from GPy.core.parameterization.param import Param
|
||||
|
||||
class PeriodicExponential(Kernpart):
|
||||
"""
|
||||
|
|
@ -25,9 +26,9 @@ class PeriodicExponential(Kernpart):
|
|||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
|
||||
def __init__(self, input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi, name='periodic_exp'):
|
||||
super(PeriodicExponential, self).__init__(input_dim, name)
|
||||
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
|
||||
self.name = 'periodic_exp'
|
||||
self.input_dim = input_dim
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
|
|
@ -38,7 +39,11 @@ class PeriodicExponential(Kernpart):
|
|||
self.num_params = 3
|
||||
self.n_freq = n_freq
|
||||
self.n_basis = 2*n_freq
|
||||
self._set_params(np.hstack((variance,lengthscale,period)))
|
||||
self.variance = Param('variance', variance)
|
||||
self.lengthscale = Param('lengthscale', lengthscale)
|
||||
self.period = Param('period', period)
|
||||
self.parameters_changed()
|
||||
#self._set_params(np.hstack((variance,lengthscale,period)))
|
||||
|
||||
def _cos(self,alpha,omega,phase):
|
||||
def f(x):
|
||||
|
|
@ -61,30 +66,25 @@ class PeriodicExponential(Kernpart):
|
|||
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
|
||||
return Gint
|
||||
|
||||
def _get_params(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.variance,self.lengthscale,self.period))
|
||||
#def _get_params(self):
|
||||
# """return the value of the parameters."""
|
||||
# return np.hstack((self.variance,self.lengthscale,self.period))
|
||||
|
||||
def _set_params(self,x):
|
||||
def parameters_changed(self):
|
||||
"""set the value of the parameters."""
|
||||
assert x.size==3
|
||||
self.variance = x[0]
|
||||
self.lengthscale = x[1]
|
||||
self.period = x[2]
|
||||
|
||||
self.a = [1./self.lengthscale, 1.]
|
||||
self.b = [1]
|
||||
|
||||
self.basis_alpha = np.ones((self.n_basis,))
|
||||
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
|
||||
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))[:,0]
|
||||
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
|
||||
|
||||
self.G = self.Gram_matrix()
|
||||
self.Gi = np.linalg.inv(self.G)
|
||||
|
||||
def _get_param_names(self):
|
||||
"""return parameter names."""
|
||||
return ['variance','lengthscale','period']
|
||||
#def _get_param_names(self):
|
||||
# """return parameter names."""
|
||||
# return ['variance','lengthscale','period']
|
||||
|
||||
def Gram_matrix(self):
|
||||
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
|
||||
|
|
@ -110,7 +110,7 @@ class PeriodicExponential(Kernpart):
|
|||
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
|
||||
|
||||
@silence_errors
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ class POLY(Kernpart):
|
|||
self._K_diag_computations(X)
|
||||
target+= self.variance*self._K_diag_dvar
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
self._K_computations(X, X2)
|
||||
base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
|
||||
|
|
@ -99,7 +99,7 @@ class POLY(Kernpart):
|
|||
target[2] += base_cov_grad.sum()
|
||||
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X"""
|
||||
self._K_computations(X, X2)
|
||||
arg = self._K_poly_arg
|
||||
|
|
|
|||
|
|
@ -19,36 +19,21 @@ class Prod(Kernpart):
|
|||
"""
|
||||
def __init__(self,k1,k2,tensor=False,name="product"):
|
||||
if tensor:
|
||||
super(Prod, self).__init__(k1.input_dim + k2.input_dim, name)
|
||||
else:
|
||||
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to sum don't have the same dimension."
|
||||
super(Prod, self).__init__(k1.input_dim, name)
|
||||
#self.num_params = k1.num_params + k2.num_params
|
||||
self.k1 = k1
|
||||
self.k2 = k2
|
||||
if tensor:
|
||||
self.slice1 = slice(0,self.k1.input_dim)
|
||||
self.slice2 = slice(self.k1.input_dim,self.k1.input_dim+self.k2.input_dim)
|
||||
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
|
||||
self.slice1 = slice(0,k1.input_dim)
|
||||
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
|
||||
else:
|
||||
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
|
||||
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
|
||||
self.slice1 = slice(0,self.input_dim)
|
||||
self.slice2 = slice(0,self.input_dim)
|
||||
|
||||
self._X, self._X2 = np.empty(shape=(2,1))
|
||||
self.k1 = k1
|
||||
self.k2 = k2
|
||||
self.add_parameters(self.k1, self.k2)
|
||||
# self._set_params(np.hstack((k1._get_params(),k2._get_params())))
|
||||
|
||||
# def _get_params(self):
|
||||
# """return the value of the parameters."""
|
||||
# return np.hstack((self.k1._get_params(), self.k2._get_params()))
|
||||
#
|
||||
# def _set_params(self,x):
|
||||
# """set the value of the parameters."""
|
||||
# self.k1._set_params(x[:self.k1.num_params])
|
||||
# self.k2._set_params(x[self.k1.num_params:])
|
||||
#
|
||||
# def _get_param_names(self):
|
||||
# """return parameter names."""
|
||||
# return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
|
||||
#initialize cache
|
||||
self._X, self._X2 = np.empty(shape=(2,1))
|
||||
self._params = None
|
||||
|
||||
def K(self,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
|
|
@ -64,15 +49,20 @@ class Prod(Kernpart):
|
|||
self._K_computations(X, X2)
|
||||
return self._K2
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
self._K_computations(X, None)
|
||||
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
|
||||
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
|
||||
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""Derivative of the covariance matrix with respect to the parameters."""
|
||||
self._K_computations(X,X2)
|
||||
if X2 is None:
|
||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
|
||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
|
||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
|
||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
|
||||
else:
|
||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
|
||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
|
||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
|
||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
|
|
@ -82,6 +72,7 @@ class Prod(Kernpart):
|
|||
self.k2.Kdiag(X[:,self.slice2],target2)
|
||||
target += target1 * target2
|
||||
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
K2 = np.zeros(X.shape[0])
|
||||
|
|
@ -90,21 +81,21 @@ class Prod(Kernpart):
|
|||
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
|
||||
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
self._K_computations(X,X2)
|
||||
if X2 is None:
|
||||
if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize):
|
||||
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
|
||||
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
|
||||
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
|
||||
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
|
||||
else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize):
|
||||
#NOTE The indices column in the inputs makes the ki.dK_dX fail when passing None instead of X[:,self.slicei]
|
||||
#NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei]
|
||||
X2 = X
|
||||
self.k1.dK_dX(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.dK_dX(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
|
||||
self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
|
||||
else:
|
||||
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
|
||||
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
|
|
@ -112,8 +103,8 @@ class Prod(Kernpart):
|
|||
self.k1.Kdiag(X[:,self.slice1],K1)
|
||||
self.k2.Kdiag(X[:,self.slice2],K2)
|
||||
|
||||
self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
|
||||
self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
||||
|
|
|
|||
|
|
@ -41,15 +41,15 @@ class prod_orthogonal(Kernpart):
|
|||
self._K_computations(X,X2)
|
||||
target += self._K1 * self._K2
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
self._K_computations(X,X2)
|
||||
if X2 is None:
|
||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
|
||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
|
||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
|
||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
|
||||
else:
|
||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
|
||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
|
||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
|
||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
|
|
@ -67,11 +67,11 @@ class prod_orthogonal(Kernpart):
|
|||
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params])
|
||||
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:])
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
self._K_computations(X,X2)
|
||||
self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target)
|
||||
self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target)
|
||||
self.k1.gradients_X(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target)
|
||||
self.k2.gradients_X(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
|
|
@ -79,8 +79,8 @@ class prod_orthogonal(Kernpart):
|
|||
self.k1.Kdiag(X[:,0:self.k1.input_dim],K1)
|
||||
self.k2.Kdiag(X[:,self.k1.input_dim:],K2)
|
||||
|
||||
self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.input_dim], target)
|
||||
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.input_dim:], target)
|
||||
self.k1.gradients_X(dL_dKdiag*K2, X[:,:self.k1.input_dim], target)
|
||||
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.k1.input_dim:], target)
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class RationalQuadratic(Kernpart):
|
|||
def Kdiag(self,X,target):
|
||||
target += self.variance
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
if X2 is None: X2 = X
|
||||
dist2 = np.square((X-X2.T)/self.lengthscale)
|
||||
|
||||
|
|
@ -68,7 +68,7 @@ class RationalQuadratic(Kernpart):
|
|||
target[0] += np.sum(dL_dKdiag)
|
||||
# here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None:
|
||||
dist2 = np.square((X-X.T)/self.lengthscale)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from kernpart import Kernpart
|
|||
from ...util.linalg import tdot
|
||||
from ...util.misc import fast_array_equal, param_to_array
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
|
||||
class RBF(Kernpart):
|
||||
"""
|
||||
|
|
@ -50,9 +51,12 @@ class RBF(Kernpart):
|
|||
else:
|
||||
lengthscale = np.ones(self.input_dim)
|
||||
|
||||
self.variance = Param('variance', variance)
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
|
||||
self.lengthscale = Param('lengthscale', lengthscale)
|
||||
self.lengthscale.add_observer(self, self.update_lengthscale)
|
||||
self.update_lengthscale(self.lengthscale)
|
||||
|
||||
self.add_parameters(self.variance, self.lengthscale)
|
||||
self.parameters_changed() # initializes cache
|
||||
|
||||
|
|
@ -114,16 +118,16 @@ class RBF(Kernpart):
|
|||
self._K_computations(X, Z)
|
||||
self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
|
||||
if self.ARD:
|
||||
self.lengthscales.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
|
||||
self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
|
||||
|
||||
else:
|
||||
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
|
||||
|
||||
#from Kmm
|
||||
self._K_computations(Z, None)
|
||||
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
|
||||
if self.ARD:
|
||||
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
||||
self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
||||
else:
|
||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||
|
||||
|
|
@ -138,7 +142,7 @@ class RBF(Kernpart):
|
|||
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
|
||||
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
||||
if not self.ARD:
|
||||
self.lengthscale.gradeint = dpsi1_dlength.sum()
|
||||
self.lengthscale.gradient = dpsi1_dlength.sum()
|
||||
else:
|
||||
self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0)
|
||||
|
||||
|
|
@ -157,9 +161,9 @@ class RBF(Kernpart):
|
|||
self._K_computations(Z, None)
|
||||
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
|
||||
if self.ARD:
|
||||
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
||||
self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
||||
else:
|
||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
|
||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
||||
|
|
@ -168,8 +172,8 @@ class RBF(Kernpart):
|
|||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ class RBFInv(RBF):
|
|||
# return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)]
|
||||
|
||||
# TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale)
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
target[0] += np.sum(self._K_dvar * dL_dK)
|
||||
if self.ARD:
|
||||
|
|
@ -142,14 +142,14 @@ class RBFInv(RBF):
|
|||
else:
|
||||
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) * (-self.lengthscale2)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
if X2 is None:
|
||||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
gradients_X = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ class RBFCos(Kernpart):
|
|||
def Kdiag(self,X,target):
|
||||
np.add(target,self.variance,target)
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
target[0] += np.sum(dL_dK*self._dvar)
|
||||
if self.ARD:
|
||||
|
|
@ -88,7 +88,7 @@ class RBFCos(Kernpart):
|
|||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
#TODO!!!
|
||||
raise NotImplementedError
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ class Spline(Kernpart):
|
|||
def Kdiag(self,X,target):
|
||||
target += self.variance*X.flatten()**3/3.
|
||||
|
||||
def dK_dtheta(self,X,X2,target):
|
||||
def _param_grad_helper(self,X,X2,target):
|
||||
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
|
||||
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
|
|
|
|||
352
GPy/kern/parts/ss_rbf.py
Normal file
352
GPy/kern/parts/ss_rbf.py
Normal file
|
|
@ -0,0 +1,352 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import numpy as np
|
||||
from kernpart import Kernpart
|
||||
from ...util.linalg import tdot
|
||||
from ...util.misc import fast_array_equal, param_to_array
|
||||
from ...core.parameterization import Param
|
||||
|
||||
class SS_RBF(Kernpart):
|
||||
"""
|
||||
The RBF kernel for Spike-and-Slab GPLVM
|
||||
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
|
||||
|
||||
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
:param lengthscale: the vector of lengthscale of the kernel
|
||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||
:rtype: kernel object
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, name='rbf'):
|
||||
super(RBF, self).__init__(input_dim, name)
|
||||
self.input_dim = input_dim
|
||||
|
||||
if lengthscale is not None:
|
||||
lengthscale = np.asarray(lengthscale)
|
||||
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
|
||||
else:
|
||||
lengthscale = np.ones(self.input_dim)
|
||||
|
||||
self.variance = Param('variance', variance)
|
||||
self.lengthscale = Param('lengthscale', lengthscale)
|
||||
self.lengthscale.add_observer(self, self.update_lengthscale)
|
||||
self.add_parameters(self.variance, self.lengthscale)
|
||||
self.parameters_changed() # initializes cache
|
||||
|
||||
def on_input_change(self, X):
|
||||
#self._K_computations(X, None)
|
||||
pass
|
||||
|
||||
def update_lengthscale(self, l):
|
||||
self.lengthscale2 = np.square(self.lengthscale)
|
||||
|
||||
def parameters_changed(self):
|
||||
# reset cached results
|
||||
self._X, self._X2 = np.empty(shape=(2, 1))
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
||||
|
||||
def K(self, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
target += self.variance * self._K_dvar
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
np.add(target, self.variance, target)
|
||||
|
||||
def psi0(self, Z, mu, S, target):
|
||||
target += self.variance
|
||||
|
||||
def psi1(self, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
target += self._psi1
|
||||
|
||||
def psi2(self, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
target += self._psi2
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
self._K_computations(X, None)
|
||||
self.variance.gradient = np.sum(self._K_dvar * dL_dK)
|
||||
if self.ARD:
|
||||
self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dK, X, None)
|
||||
else:
|
||||
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
|
||||
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
#contributions from Kdiag
|
||||
self.variance.gradient = np.sum(dL_dKdiag)
|
||||
|
||||
#from Knm
|
||||
self._K_computations(X, Z)
|
||||
self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
|
||||
if self.ARD:
|
||||
self.lengthscales.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
|
||||
|
||||
else:
|
||||
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||
|
||||
#from Kmm
|
||||
self._K_computations(Z, None)
|
||||
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
|
||||
if self.ARD:
|
||||
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
||||
else:
|
||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
self._psi_computations(Z, mu, S)
|
||||
|
||||
#contributions from psi0:
|
||||
self.variance.gradient = np.sum(dL_dpsi0)
|
||||
|
||||
#from psi1
|
||||
self.variance.gradient += np.sum(dL_dpsi1 * self._psi1 / self.variance)
|
||||
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
|
||||
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
||||
if not self.ARD:
|
||||
self.lengthscale.gradeint = dpsi1_dlength.sum()
|
||||
else:
|
||||
self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0)
|
||||
|
||||
#from psi2
|
||||
d_var = 2.*self._psi2 / self.variance
|
||||
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
|
||||
|
||||
self.variance.gradient += np.sum(dL_dpsi2 * d_var)
|
||||
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
|
||||
if not self.ARD:
|
||||
self.lengthscale.gradient += dpsi2_dlength.sum()
|
||||
else:
|
||||
self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0)
|
||||
|
||||
#from Kmm
|
||||
self._K_computations(Z, None)
|
||||
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
|
||||
if self.ARD:
|
||||
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
||||
else:
|
||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
||||
self._K_computations(X, X2)
|
||||
if X2 is None:
|
||||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
||||
#---------------------------------------#
|
||||
# PSI statistics #
|
||||
#---------------------------------------#
|
||||
|
||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
||||
pass
|
||||
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
denominator = (self.lengthscale2 * (self._psi1_denom))
|
||||
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
||||
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
||||
|
||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
|
||||
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
||||
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
||||
|
||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
|
||||
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
|
||||
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
||||
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
||||
|
||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
||||
"""Think N,num_inducing,num_inducing,input_dim """
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
|
||||
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
||||
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
||||
|
||||
#---------------------------------------#
|
||||
# Precomputations #
|
||||
#---------------------------------------#
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
#params = self._get_params()
|
||||
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):# and fast_array_equal(self._params_save , params)):
|
||||
#self._X = X.copy()
|
||||
#self._params_save = params.copy()
|
||||
if X2 is None:
|
||||
self._X2 = None
|
||||
X = X / self.lengthscale
|
||||
Xsquare = np.sum(np.square(X), 1)
|
||||
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
|
||||
else:
|
||||
self._X2 = X2.copy()
|
||||
X = X / self.lengthscale
|
||||
X2 = X2 / self.lengthscale
|
||||
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
|
||||
self._K_dvar = np.exp(-0.5 * self._K_dist2)
|
||||
|
||||
def _dL_dlengthscales_via_K(self, dL_dK, X, X2):
|
||||
"""
|
||||
A helper function for update_gradients_* methods
|
||||
|
||||
Computes the derivative of the objective L wrt the lengthscales via
|
||||
|
||||
dL_dl = sum_{i,j}(dL_dK_{ij} dK_dl)
|
||||
|
||||
assumes self._K_computations has just been called.
|
||||
|
||||
This is only valid if self.ARD=True
|
||||
"""
|
||||
target = np.zeros(self.input_dim)
|
||||
dvardLdK = self._K_dvar * dL_dK
|
||||
var_len3 = self.variance / np.power(self.lengthscale, 3)
|
||||
if X2 is None:
|
||||
# save computation for the symmetrical case
|
||||
dvardLdK = dvardLdK + dvardLdK.T
|
||||
code = """
|
||||
int q,i,j;
|
||||
double tmp;
|
||||
for(q=0; q<input_dim; q++){
|
||||
tmp = 0;
|
||||
for(i=0; i<num_data; i++){
|
||||
for(j=0; j<i; j++){
|
||||
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
|
||||
}
|
||||
}
|
||||
target(q) += var_len3(q)*tmp;
|
||||
}
|
||||
"""
|
||||
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
|
||||
X, dvardLdK = param_to_array(X, dvardLdK)
|
||||
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
|
||||
else:
|
||||
code = """
|
||||
int q,i,j;
|
||||
double tmp;
|
||||
for(q=0; q<input_dim; q++){
|
||||
tmp = 0;
|
||||
for(i=0; i<num_data; i++){
|
||||
for(j=0; j<num_inducing; j++){
|
||||
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
|
||||
}
|
||||
}
|
||||
target(q) += var_len3(q)*tmp;
|
||||
}
|
||||
"""
|
||||
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
|
||||
X, X2, dvardLdK = param_to_array(X, X2, dvardLdK)
|
||||
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
|
||||
return target
|
||||
|
||||
|
||||
|
||||
def _psi_computations(self, Z, mu, S):
|
||||
# here are the "statistics" for psi1 and psi2
|
||||
Z_changed = not fast_array_equal(Z, self._Z)
|
||||
if Z_changed:
|
||||
# Z has changed, compute Z specific stuff
|
||||
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||
self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q
|
||||
|
||||
if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
|
||||
# something's changed. recompute EVERYTHING
|
||||
|
||||
# psi1
|
||||
self._psi1_denom = S[:, None, :] / self.lengthscale2 + 1.
|
||||
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
|
||||
self._psi1_dist_sq = np.square(self._psi1_dist) / self.lengthscale2 / self._psi1_denom
|
||||
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
|
||||
self._psi1 = self.variance * np.exp(self._psi1_exponent)
|
||||
|
||||
# psi2
|
||||
self._psi2_denom = 2.*S[:, None, None, :] / self.lengthscale2 + 1. # N,M,M,Q
|
||||
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
|
||||
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
|
||||
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
|
||||
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
|
||||
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
|
||||
|
||||
# store matrices for caching
|
||||
self._Z, self._mu, self._S = Z, mu, S
|
||||
|
||||
def weave_psi2(self, mu, Zhat):
|
||||
N, input_dim = mu.shape
|
||||
num_inducing = Zhat.shape[0]
|
||||
|
||||
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
|
||||
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
|
||||
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
|
||||
psi2 = np.empty((N, num_inducing, num_inducing))
|
||||
|
||||
psi2_Zdist_sq = self._psi2_Zdist_sq
|
||||
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
|
||||
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
|
||||
variance_sq = float(np.square(self.variance))
|
||||
if self.ARD:
|
||||
lengthscale2 = self.lengthscale2
|
||||
else:
|
||||
lengthscale2 = np.ones(input_dim) * self.lengthscale2
|
||||
code = """
|
||||
double tmp;
|
||||
|
||||
#pragma omp parallel for private(tmp)
|
||||
for (int n=0; n<N; n++){
|
||||
for (int m=0; m<num_inducing; m++){
|
||||
for (int mm=0; mm<(m+1); mm++){
|
||||
for (int q=0; q<input_dim; q++){
|
||||
//compute mudist
|
||||
tmp = mu(n,q) - Zhat(m,mm,q);
|
||||
mudist(n,m,mm,q) = tmp;
|
||||
mudist(n,mm,m,q) = tmp;
|
||||
|
||||
//now mudist_sq
|
||||
tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q);
|
||||
mudist_sq(n,m,mm,q) = tmp;
|
||||
mudist_sq(n,mm,m,q) = tmp;
|
||||
|
||||
//now psi2_exponent
|
||||
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
|
||||
psi2_exponent(n,mm,m) += tmp;
|
||||
if (m !=mm){
|
||||
psi2_exponent(n,m,mm) += tmp;
|
||||
}
|
||||
//psi2 would be computed like this, but np is faster
|
||||
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
|
||||
//psi2(n,m,mm) = tmp;
|
||||
//psi2(n,mm,m) = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
support_code = """
|
||||
#include <omp.h>
|
||||
#include <math.h>
|
||||
"""
|
||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
|
||||
type_converters=weave.converters.blitz, **self.weave_options)
|
||||
|
||||
return mudist, mudist_sq, psi2_exponent, psi2
|
||||
|
|
@ -40,7 +40,7 @@ class Symmetric(Kernpart):
|
|||
self.k.K(X,AX2,target)
|
||||
self.k.K(AX,AX2,target)
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
AX = np.dot(X,self.transform)
|
||||
if X2 is None:
|
||||
|
|
@ -48,13 +48,13 @@ class Symmetric(Kernpart):
|
|||
ZX2 = AX
|
||||
else:
|
||||
AX2 = np.dot(X2, self.transform)
|
||||
self.k.dK_dtheta(dL_dK,X,X2,target)
|
||||
self.k.dK_dtheta(dL_dK,AX,X2,target)
|
||||
self.k.dK_dtheta(dL_dK,X,AX2,target)
|
||||
self.k.dK_dtheta(dL_dK,AX,AX2,target)
|
||||
self.k._param_grad_helper(dL_dK,X,X2,target)
|
||||
self.k._param_grad_helper(dL_dK,AX,X2,target)
|
||||
self.k._param_grad_helper(dL_dK,X,AX2,target)
|
||||
self.k._param_grad_helper(dL_dK,AX,AX2,target)
|
||||
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
AX = np.dot(X,self.transform)
|
||||
if X2 is None:
|
||||
|
|
@ -62,10 +62,10 @@ class Symmetric(Kernpart):
|
|||
ZX2 = AX
|
||||
else:
|
||||
AX2 = np.dot(X2, self.transform)
|
||||
self.k.dK_dX(dL_dK, X, X2, target)
|
||||
self.k.dK_dX(dL_dK, AX, X2, target)
|
||||
self.k.dK_dX(dL_dK, X, AX2, target)
|
||||
self.k.dK_dX(dL_dK, AX ,AX2, target)
|
||||
self.k.gradients_X(dL_dK, X, X2, target)
|
||||
self.k.gradients_X(dL_dK, AX, X2, target)
|
||||
self.k.gradients_X(dL_dK, X, AX2, target)
|
||||
self.k.gradients_X(dL_dK, AX ,AX2, target)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
|
|
|
|||
|
|
@ -348,7 +348,7 @@ class spkern(Kernpart):
|
|||
def Kdiag(self,X,target):
|
||||
self._weave_inline(self._Kdiag_code, X, target)
|
||||
|
||||
def dK_dtheta(self,partial,X,Z,target):
|
||||
def _param_grad_helper(self,partial,X,Z,target):
|
||||
if Z is None:
|
||||
self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
|
||||
else:
|
||||
|
|
@ -357,7 +357,7 @@ class spkern(Kernpart):
|
|||
def dKdiag_dtheta(self,partial,X,target):
|
||||
self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial)
|
||||
|
||||
def dK_dX(self,partial,X,Z,target):
|
||||
def gradients_X(self,partial,X,Z,target):
|
||||
if Z is None:
|
||||
self._weave_inline(self._dK_dX_code_X, X, target, Z, partial)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
|
||||
class White(Kernpart):
|
||||
"""
|
||||
|
|
@ -17,7 +18,7 @@ class White(Kernpart):
|
|||
def __init__(self,input_dim,variance=1.):
|
||||
super(White, self).__init__(input_dim, 'white')
|
||||
self.input_dim = input_dim
|
||||
self.variance = Param('variance', variance)
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
self.add_parameters(self.variance)
|
||||
self._psi1 = 0 # TODO: more elegance here
|
||||
|
||||
|
|
@ -32,7 +33,7 @@ class White(Kernpart):
|
|||
self.variance.gradient = np.trace(dL_dK)
|
||||
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
raise NotImplementedError
|
||||
self.variance.gradient = np.trace(dL_dKmm) + np.sum(dL_dKdiag)
|
||||
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
raise NotImplementedError
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ class Bernoulli(Likelihood):
|
|||
|
||||
return Z_hat, mu_hat, sigma2_hat
|
||||
|
||||
def _predictive_mean_analytical(self, mu, variance):
|
||||
def predictive_mean(self, mu, variance):
|
||||
|
||||
if isinstance(self.gp_link, link_functions.Probit):
|
||||
return stats.norm.cdf(mu/np.sqrt(1+variance))
|
||||
|
|
@ -89,12 +89,13 @@ class Bernoulli(Likelihood):
|
|||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
def _predictive_variance_analytical(self, mu, variance, pred_mean):
|
||||
def predictive_variance(self, mu, variance, pred_mean):
|
||||
|
||||
if isinstance(self.gp_link, link_functions.Heaviside):
|
||||
return 0.
|
||||
else:
|
||||
raise NotImplementedError
|
||||
return np.nan
|
||||
#raise NotImplementedError
|
||||
|
||||
def pdf_link(self, link_f, y, extra_data=None):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
#TODO
|
||||
"""
|
||||
A lot of this code assumes that the link functio nis the identity.
|
||||
A lot of this code assumes that the link function is the identity.
|
||||
|
||||
I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity.
|
||||
I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity.
|
||||
|
||||
Furthermore, exact Guassian inference can only be done for the identity link, so we should be asserting so for all calls which relate to that.
|
||||
|
||||
|
|
@ -18,6 +17,7 @@ from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
|||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
from ..core.parameterization import Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
|
||||
class Gaussian(Likelihood):
|
||||
"""
|
||||
|
|
@ -43,7 +43,7 @@ class Gaussian(Likelihood):
|
|||
|
||||
super(Gaussian, self).__init__(gp_link, name=name)
|
||||
|
||||
self.variance = Param('variance', variance)
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
self.add_parameter(self.variance)
|
||||
|
||||
if isinstance(gp_link, link_functions.Identity):
|
||||
|
|
@ -130,7 +130,10 @@ class Gaussian(Likelihood):
|
|||
:rtype: float
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi))
|
||||
N = y.shape[0]
|
||||
ln_det_cov = N*np.log(self.variance)
|
||||
|
||||
return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, extra_data=None):
|
||||
"""
|
||||
|
|
@ -175,7 +178,8 @@ class Gaussian(Likelihood):
|
|||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
hess = -(1.0/self.variance)*np.ones((self.N, 1))
|
||||
N = y.shape[0]
|
||||
hess = -(1.0/self.variance)*np.ones((N, 1))
|
||||
return hess
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
|
||||
|
|
@ -194,7 +198,8 @@ class Gaussian(Likelihood):
|
|||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None]
|
||||
N = y.shape[0]
|
||||
d3logpdf_dlink3 = np.zeros((N,1))
|
||||
return d3logpdf_dlink3
|
||||
|
||||
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
|
||||
|
|
@ -215,7 +220,8 @@ class Gaussian(Likelihood):
|
|||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
e = y - link_f
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.sum(np.square(e))
|
||||
N = y.shape[0]
|
||||
dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
|
||||
return np.sum(dlik_dsigma) # Sure about this sum?
|
||||
|
||||
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
|
||||
|
|
@ -255,7 +261,8 @@ class Gaussian(Likelihood):
|
|||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None]
|
||||
N = y.shape[0]
|
||||
d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
|
||||
return d2logpdf_dlink2_dvar
|
||||
|
||||
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
|
||||
|
|
|
|||
|
|
@ -13,12 +13,12 @@ from ..core.parameterization import Parameterized
|
|||
|
||||
class Likelihood(Parameterized):
|
||||
"""
|
||||
Likelihood base class, used to defing p(y|f).
|
||||
Likelihood base class, used to defing p(y|f).
|
||||
|
||||
All instances use _inverse_ link functions, which can be swapped out. It is
|
||||
expected that inherriting classes define a default inverse link function
|
||||
|
||||
To use this class, inherrit and define missing functionality.
|
||||
To use this class, inherrit and define missing functionality.
|
||||
|
||||
Inherriting classes *must* implement:
|
||||
pdf_link : a bound method which turns the output of the link function into the pdf
|
||||
|
|
@ -27,7 +27,7 @@ class Likelihood(Parameterized):
|
|||
To enable use with EP, inherriting classes *must* define:
|
||||
TODO: a suitable derivative function for any parameters of the class
|
||||
It is also desirable to define:
|
||||
moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature.
|
||||
moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature.
|
||||
|
||||
To enable use with Laplace approximation, inherriting classes *must* define:
|
||||
Some derivative functions *AS TODO*
|
||||
|
|
@ -36,7 +36,7 @@ class Likelihood(Parameterized):
|
|||
|
||||
"""
|
||||
def __init__(self, gp_link, name):
|
||||
super(Likelihood, self).__init__(name)
|
||||
super(Likelihood, self).__init__(name)
|
||||
assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation."
|
||||
self.gp_link = gp_link
|
||||
self.log_concave = False
|
||||
|
|
@ -44,6 +44,10 @@ class Likelihood(Parameterized):
|
|||
def _gradients(self,partial):
|
||||
return np.zeros(0)
|
||||
|
||||
def update_gradients(self, partial):
|
||||
if self.size > 0:
|
||||
raise NotImplementedError('Must be implemented for likelihoods with parameters to be optimized')
|
||||
|
||||
def _preprocess_values(self,Y):
|
||||
"""
|
||||
In case it is needed, this function assess the output values or makes any pertinent transformation on them.
|
||||
|
|
@ -303,31 +307,31 @@ class Likelihood(Parameterized):
|
|||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if len(self._get_param_names()) > 0:
|
||||
if self.size > 0:
|
||||
link_f = self.gp_link.transf(f)
|
||||
return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
|
||||
else:
|
||||
#Is no parameters so return an empty array for its derivatives
|
||||
return np.empty([1, 0])
|
||||
return np.zeros([1, 0])
|
||||
|
||||
def dlogpdf_df_dtheta(self, f, y, extra_data=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if len(self._get_param_names()) > 0:
|
||||
if self.size > 0:
|
||||
link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
|
||||
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
||||
else:
|
||||
#Is no parameters so return an empty array for its derivatives
|
||||
return np.empty([f.shape[0], 0])
|
||||
return np.zeros([f.shape[0], 0])
|
||||
|
||||
def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if len(self._get_param_names()) > 0:
|
||||
if self.size > 0:
|
||||
link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
|
|
@ -336,7 +340,7 @@ class Likelihood(Parameterized):
|
|||
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
||||
else:
|
||||
#Is no parameters so return an empty array for its derivatives
|
||||
return np.empty([f.shape[0], 0])
|
||||
return np.zeros([f.shape[0], 0])
|
||||
|
||||
def _laplace_gradients(self, f, y, extra_data=None):
|
||||
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
|
||||
|
|
@ -345,9 +349,12 @@ class Likelihood(Parameterized):
|
|||
|
||||
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
|
||||
# ensure we have gradients for every parameter we want to optimize
|
||||
assert dlogpdf_dtheta.shape[1] == len(self._get_param_names())
|
||||
assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names())
|
||||
assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names())
|
||||
try:
|
||||
assert len(dlogpdf_dtheta) == self.size #1 x num_param array
|
||||
assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix
|
||||
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
|
||||
except Exception as e:
|
||||
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
||||
|
||||
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
||||
|
||||
|
|
|
|||
|
|
@ -19,8 +19,11 @@ class Poisson(Likelihood):
|
|||
.. Note::
|
||||
Y is expected to take values in {0,1,2,...}
|
||||
"""
|
||||
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
|
||||
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||
def __init__(self, gp_link=None):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Log_ex_1()
|
||||
|
||||
super(Poisson, self).__init__(gp_link, name='Poisson')
|
||||
|
||||
def _preprocess_values(self,Y):
|
||||
return Y
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import link_functions
|
|||
from scipy import stats, integrate
|
||||
from scipy.special import gammaln, gamma
|
||||
from likelihood import Likelihood
|
||||
from ..core.parameterization import Param
|
||||
|
||||
class StudentT(Likelihood):
|
||||
"""
|
||||
|
|
@ -19,26 +20,30 @@ class StudentT(Likelihood):
|
|||
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
|
||||
|
||||
"""
|
||||
def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2):
|
||||
self.v = deg_free
|
||||
self.sigma2 = sigma2
|
||||
def __init__(self,gp_link=None, deg_free=5, sigma2=2):
|
||||
if gp_link is None:
|
||||
gp_link = link_functions.Identity()
|
||||
|
||||
super(StudentT, self).__init__(gp_link, name='Student_T')
|
||||
|
||||
self.sigma2 = Param('t_noise', float(sigma2))
|
||||
self.v = Param('deg_free', float(deg_free))
|
||||
self.add_parameter(self.sigma2)
|
||||
self.add_parameter(self.v)
|
||||
self.v.constrain_fixed()
|
||||
|
||||
self._set_params(np.asarray(sigma2))
|
||||
super(StudentT, self).__init__(gp_link,analytical_mean,analytical_variance)
|
||||
self.log_concave = False
|
||||
|
||||
def _get_params(self):
|
||||
return np.asarray(self.sigma2)
|
||||
def parameters_changed(self):
|
||||
self.variance = (self.v / float(self.v - 2)) * self.sigma2
|
||||
|
||||
def _get_param_names(self):
|
||||
return ["t_noise_std2"]
|
||||
|
||||
def _set_params(self, x):
|
||||
self.sigma2 = float(x)
|
||||
|
||||
@property
|
||||
def variance(self, extra_data=None):
|
||||
return (self.v / float(self.v - 2)) * self.sigma2
|
||||
def update_gradients(self, partial):
|
||||
"""
|
||||
Pull out the gradients, be careful as the order must match the order
|
||||
in which the parameters are added
|
||||
"""
|
||||
self.sigma2.gradient = partial[0]
|
||||
self.v.gradient = partial[1]
|
||||
|
||||
def pdf_link(self, link_f, y, extra_data=None):
|
||||
"""
|
||||
|
|
@ -82,10 +87,14 @@ class StudentT(Likelihood):
|
|||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - link_f
|
||||
#FIXME:
|
||||
#Why does np.log(1 + (1/self.v)*((y-link_f)**2)/self.sigma2) suppress the divide by zero?!
|
||||
#But np.log(1 + (1/float(self.v))*((y-link_f)**2)/self.sigma2) throws it correctly
|
||||
#print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
|
||||
objective = (+ gammaln((self.v + 1) * 0.5)
|
||||
- gammaln(self.v * 0.5)
|
||||
- 0.5*np.log(self.sigma2 * self.v * np.pi)
|
||||
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
|
||||
- gammaln(self.v * 0.5)
|
||||
- 0.5*np.log(self.sigma2 * self.v * np.pi)
|
||||
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
|
||||
)
|
||||
return np.sum(objective)
|
||||
|
||||
|
|
@ -222,17 +231,20 @@ class StudentT(Likelihood):
|
|||
|
||||
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
|
||||
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
|
||||
return np.asarray([[dlogpdf_dvar]])
|
||||
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
|
||||
return np.hstack((dlogpdf_dvar, dlogpdf_dv))
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
|
||||
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
|
||||
return dlogpdf_dlink_dvar
|
||||
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
|
||||
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
|
||||
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
|
||||
return d2logpdf_dlink2_dvar
|
||||
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
|
||||
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
|
||||
|
||||
def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None):
|
||||
def predictive_variance(self, mu, sigma, predictive_mean=None):
|
||||
"""
|
||||
Compute predictive variance of student_t*normal p(y*|f*)p(f*)
|
||||
|
||||
|
|
@ -252,7 +264,7 @@ class StudentT(Likelihood):
|
|||
|
||||
return true_var
|
||||
|
||||
def _predictive_mean_analytical(self, mu, sigma):
|
||||
def predictive_mean(self, mu, sigma):
|
||||
"""
|
||||
Compute mean of the prediction
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -57,4 +57,4 @@ class Kernel(Mapping):
|
|||
return np.hstack((self._df_dA.flatten(), self._df_dbias))
|
||||
|
||||
def df_dX(self, dL_df, X):
|
||||
return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
|
||||
return self.kern.gradients_X((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
from gp_regression import GPRegression
|
||||
from gp_classification import GPClassification
|
||||
from sparse_gp_regression import SparseGPRegression
|
||||
from sparse_gp_regression import SparseGPRegression, SparseGPRegressionUncertainInput
|
||||
from svigp_regression import SVIGPRegression
|
||||
from sparse_gp_classification import SparseGPClassification
|
||||
from gplvm import GPLVM
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
import itertools
|
||||
from gplvm import GPLVM
|
||||
from .. import kern
|
||||
from ..core import SparseGP
|
||||
|
|
@ -23,15 +22,10 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
|||
:type init: 'PCA'|'random'
|
||||
|
||||
"""
|
||||
def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, name='bayesian gplvm', **kwargs):
|
||||
if type(likelihood_or_Y) is np.ndarray:
|
||||
likelihood = Gaussian(likelihood_or_Y)
|
||||
else:
|
||||
likelihood = likelihood_or_Y
|
||||
|
||||
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, inference_method=None, likelihood=Gaussian(), name='bayesian gplvm', **kwargs):
|
||||
if X == None:
|
||||
X = self.initialise_latent(init, input_dim, likelihood.Y)
|
||||
X = self.initialise_latent(init, input_dim, Y)
|
||||
self.init = init
|
||||
|
||||
if X_variance is None:
|
||||
|
|
@ -44,10 +38,10 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
|||
if kernel is None:
|
||||
kernel = kern.rbf(input_dim) # + kern.white(input_dim)
|
||||
|
||||
SparseGP.__init__(self, X=X, likelihood=likelihood, kernel=kernel, Z=Z, X_variance=X_variance, name=name, **kwargs)
|
||||
self.q = Normal(self.X, self.X_variance)
|
||||
self.add_parameter(self.q, gradient=self._dbound_dmuS, index=0)
|
||||
self.ensure_default_constraints()
|
||||
self.q = Normal(X, X_variance)
|
||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs)
|
||||
self.add_parameter(self.q, index=0)
|
||||
#self.ensure_default_constraints()
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
|
|
@ -61,42 +55,10 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
|||
self.init = state.pop()
|
||||
SparseGP._setstate(self, state)
|
||||
|
||||
# def _get_param_names(self):
|
||||
# X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
|
||||
# S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
|
||||
# return (X_names + S_names + SparseGP._get_param_names(self))
|
||||
|
||||
#def _get_print_names(self):
|
||||
# return SparseGP._get_print_names(self)
|
||||
|
||||
# def _get_params(self):
|
||||
# """
|
||||
# Horizontally stacks the parameters in order to present them to the optimizer.
|
||||
# The resulting 1-input_dim array has this structure:
|
||||
#
|
||||
# ===============================================================
|
||||
# | mu | S | Z | theta | beta |
|
||||
# ===============================================================
|
||||
#
|
||||
# """
|
||||
# x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self)))
|
||||
# return x
|
||||
|
||||
# def _set_params(self, x, save_old=True, save_count=0):
|
||||
# N, input_dim = self.num_data, self.input_dim
|
||||
# self.X = x[:self.X.size].reshape(N, input_dim).copy()
|
||||
# self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
|
||||
# SparseGP._set_params(self, x[(2 * N * input_dim):])
|
||||
|
||||
def dKL_dmuS(self):
|
||||
dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
|
||||
dKL_dmu = self.X
|
||||
return dKL_dmu, dKL_dS
|
||||
|
||||
def dL_dmuS(self):
|
||||
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
|
||||
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
|
||||
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
|
||||
|
||||
|
|
@ -107,31 +69,25 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
|||
var_S = np.sum(self.X_variance - np.log(self.X_variance))
|
||||
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
|
||||
|
||||
def log_likelihood(self):
|
||||
ll = SparseGP.log_likelihood(self)
|
||||
kl = self.KL_divergence()
|
||||
return ll - kl
|
||||
def parameters_changed(self):
|
||||
super(BayesianGPLVM, self).parameters_changed()
|
||||
self._log_marginal_likelihood -= self.KL_divergence()
|
||||
|
||||
def _dbound_dmuS(self):
|
||||
dKL_dmu, dKL_dS = self.dKL_dmuS()
|
||||
dL_dmu, dL_dS = self.dL_dmuS()
|
||||
d_dmu = (dL_dmu - dKL_dmu).flatten()
|
||||
d_dS = (dL_dS - dKL_dS).flatten()
|
||||
return np.hstack((d_dmu, d_dS))
|
||||
|
||||
# def _log_likelihood_gradients(self):
|
||||
# dKL_dmu, dKL_dS = self.dKL_dmuS()
|
||||
# dL_dmu, dL_dS = self.dL_dmuS()
|
||||
# d_dmu = (dL_dmu - dKL_dmu).flatten()
|
||||
# d_dS = (dL_dS - dKL_dS).flatten()
|
||||
# self.dbound_dmuS = np.hstack((d_dmu, d_dS))
|
||||
# self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self)
|
||||
# return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
|
||||
# dL:
|
||||
self.q.mean.gradient = dL_dmu
|
||||
self.q.variance.gradient = dL_dS
|
||||
|
||||
# dKL:
|
||||
self.q.mean.gradient -= self.X
|
||||
self.q.variance.gradient -= (1. - (1. / (self.X_variance))) * 0.5
|
||||
|
||||
def plot_latent(self, plot_inducing=True, *args, **kwargs):
|
||||
"""
|
||||
See GPy.plotting.matplot_dep.dim_reduction_plots.plot_latent
|
||||
"""
|
||||
import sys
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import dim_reduction_plots
|
||||
|
||||
|
|
@ -181,18 +137,18 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
|||
"""
|
||||
dmu_dX = np.zeros_like(Xnew)
|
||||
for i in range(self.Z.shape[0]):
|
||||
dmu_dX += self.kern.dK_dX(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
|
||||
dmu_dX += self.kern.gradients_X(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
|
||||
return dmu_dX
|
||||
|
||||
def dmu_dXnew(self, Xnew):
|
||||
"""
|
||||
Individual gradient of prediction at Xnew w.r.t. each sample in Xnew
|
||||
"""
|
||||
dK_dX = np.zeros((Xnew.shape[0], self.num_inducing))
|
||||
gradients_X = np.zeros((Xnew.shape[0], self.num_inducing))
|
||||
ones = np.ones((1, 1))
|
||||
for i in range(self.Z.shape[0]):
|
||||
dK_dX[:, i] = self.kern.dK_dX(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
|
||||
return np.dot(dK_dX, self.Cpsi1Vf)
|
||||
gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
|
||||
return np.dot(gradients_X, self.Cpsi1Vf)
|
||||
|
||||
def plot_steepest_gradient_map(self, *args, ** kwargs):
|
||||
"""
|
||||
|
|
@ -201,7 +157,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
|||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import dim_reduction_plots
|
||||
|
||||
return dim_reduction_plots.plot_steepest_gradient_map(model,*args,**kwargs)
|
||||
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
|
||||
|
||||
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class BCGPLVM(GPLVM):
|
|||
GP._set_params(self, x[self.mapping.num_params:])
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
dL_df = self.kern.dK_dX(self.dL_dK, self.X)
|
||||
dL_df = self.kern.gradients_X(self.dL_dK, self.X)
|
||||
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
|
||||
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))
|
||||
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ class GPLVM(GP):
|
|||
def jacobian(self,X):
|
||||
target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||
for i in range(self.output_dim):
|
||||
target[:,:,i]=self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
|
||||
target[:,:,i]=self.kern.gradients_X(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
|
||||
return target
|
||||
|
||||
def magnification(self,X):
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
# ## Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from GPy.core.model import Model
|
||||
from ..core.model import Model
|
||||
import itertools
|
||||
import numpy
|
||||
from ..core.parameterization import Param
|
||||
|
||||
def get_shape(x):
|
||||
if isinstance(x, numpy.ndarray):
|
||||
|
|
@ -24,42 +25,42 @@ class GradientChecker(Model):
|
|||
"""
|
||||
:param f: Function to check gradient for
|
||||
:param df: Gradient of function to check
|
||||
:param x0:
|
||||
:param x0:
|
||||
Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
|
||||
Can be a list of arrays, if takes a list of arrays. This list will be passed
|
||||
Can be a list of arrays, if takes a list of arrays. This list will be passed
|
||||
to f and df in the same order as given here.
|
||||
If only one argument, make sure not to pass a list!!!
|
||||
|
||||
|
||||
:type x0: [array-like] | array-like | float | int
|
||||
:param names:
|
||||
Names to print, when performing gradcheck. If a list was passed to x0
|
||||
a list of names with the same length is expected.
|
||||
:param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
|
||||
|
||||
|
||||
Examples:
|
||||
---------
|
||||
from GPy.models import GradientChecker
|
||||
N, M, Q = 10, 5, 3
|
||||
|
||||
|
||||
Sinusoid:
|
||||
|
||||
|
||||
X = numpy.random.rand(N, Q)
|
||||
grad = GradientChecker(numpy.sin,numpy.cos,X,'x')
|
||||
grad.checkgrad(verbose=1)
|
||||
|
||||
|
||||
Using GPy:
|
||||
|
||||
|
||||
X, Z = numpy.random.randn(N,Q), numpy.random.randn(M,Q)
|
||||
kern = GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True)
|
||||
grad = GradientChecker(kern.K,
|
||||
grad = GradientChecker(kern.K,
|
||||
lambda x: 2*kern.dK_dX(numpy.ones((1,1)), x),
|
||||
x0 = X.copy(),
|
||||
names='X')
|
||||
names='X')
|
||||
grad.checkgrad(verbose=1)
|
||||
grad.randomize()
|
||||
grad.checkgrad(verbose=1)
|
||||
grad.checkgrad(verbose=1)
|
||||
"""
|
||||
Model.__init__(self)
|
||||
Model.__init__(self, 'GradientChecker')
|
||||
if isinstance(x0, (list, tuple)) and names is None:
|
||||
self.shapes = [get_shape(xi) for xi in x0]
|
||||
self.names = ['X{i}'.format(i=i) for i in range(len(x0))]
|
||||
|
|
@ -72,8 +73,10 @@ class GradientChecker(Model):
|
|||
else:
|
||||
self.names = names
|
||||
self.shapes = [get_shape(x0)]
|
||||
|
||||
for name, xi in zip(self.names, at_least_one_element(x0)):
|
||||
self.__setattr__(name, xi)
|
||||
self.__setattr__(name, Param(name, xi))
|
||||
self.add_parameter(self.__getattribute__(name))
|
||||
# self._param_names = []
|
||||
# for name, shape in zip(self.names, self.shapes):
|
||||
# self._param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
|
||||
|
|
@ -93,20 +96,18 @@ class GradientChecker(Model):
|
|||
def _log_likelihood_gradients(self):
|
||||
return numpy.atleast_1d(self.df(*self._get_x(), **self.kwargs)).flatten()
|
||||
|
||||
#def _get_params(self):
|
||||
#return numpy.atleast_1d(numpy.hstack(map(lambda name: flatten_if_needed(self.__getattribute__(name)), self.names)))
|
||||
|
||||
def _get_params(self):
|
||||
return numpy.atleast_1d(numpy.hstack(map(lambda name: flatten_if_needed(self.__getattribute__(name)), self.names)))
|
||||
#def _set_params(self, x):
|
||||
#current_index = 0
|
||||
#for name, shape in zip(self.names, self.shapes):
|
||||
#current_size = numpy.prod(shape)
|
||||
#self.__setattr__(name, x[current_index:current_index + current_size].reshape(shape))
|
||||
#current_index += current_size
|
||||
|
||||
|
||||
def _set_params(self, x):
|
||||
current_index = 0
|
||||
for name, shape in zip(self.names, self.shapes):
|
||||
current_size = numpy.prod(shape)
|
||||
self.__setattr__(name, x[current_index:current_index + current_size].reshape(shape))
|
||||
current_index += current_size
|
||||
|
||||
def _get_param_names(self):
|
||||
_param_names = []
|
||||
for name, shape in zip(self.names, self.shapes):
|
||||
_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
|
||||
return _param_names
|
||||
#def _get_param_names(self):
|
||||
#_param_names = []
|
||||
#for name, shape in zip(self.names, self.shapes):
|
||||
#_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
|
||||
#return _param_names
|
||||
|
|
|
|||
|
|
@ -16,44 +16,83 @@ class SparseGPRegression(SparseGP):
|
|||
:param X: input observations
|
||||
:param Y: observed values
|
||||
:param kernel: a GPy kernel, defaults to rbf+white
|
||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||
:type normalize_X: False|True
|
||||
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
||||
:type normalize_Y: False|True
|
||||
:param Z: inducing inputs (optional, see note)
|
||||
:type Z: np.ndarray (num_inducing x input_dim) | None
|
||||
:param num_inducing: number of inducing points (ignored if Z is passed, see note)
|
||||
:type num_inducing: int
|
||||
:rtype: model object
|
||||
:param X_variance: The uncertainty in the measurements of X (Gaussian variance)
|
||||
:type X_variance: np.ndarray (num_data x input_dim) | None
|
||||
|
||||
.. Note:: If no Z array is passed, num_inducing (default 10) points are selected from the data. Other wise num_inducing is ignored
|
||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10, X_variance=None):
|
||||
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None):
|
||||
num_data, input_dim = X.shape
|
||||
|
||||
# kern defaults to rbf (plus white for stability)
|
||||
if kernel is None:
|
||||
kernel = kern.rbf(X.shape[1]) # + kern.white(X.shape[1], 1e-3)
|
||||
kernel = kern.rbf(input_dim)# + kern.white(input_dim, variance=1e-3)
|
||||
|
||||
# Z defaults to a subset of the data
|
||||
if Z is None:
|
||||
i = np.random.permutation(X.shape[0])[:num_inducing]
|
||||
i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
|
||||
Z = X[i].copy()
|
||||
else:
|
||||
assert Z.shape[1] == X.shape[1]
|
||||
assert Z.shape[1] == input_dim
|
||||
|
||||
# likelihood defaults to Gaussian
|
||||
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
|
||||
likelihood = likelihoods.Gaussian()
|
||||
|
||||
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance)
|
||||
self.ensure_default_constraints()
|
||||
pass
|
||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance)
|
||||
#self.ensure_default_constraints()
|
||||
|
||||
def _getstate(self):
|
||||
return SparseGP._getstate(self)
|
||||
|
||||
|
||||
def _setstate(self, state):
|
||||
return SparseGP._setstate(self, state)
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class SparseGPRegressionUncertainInput(SparseGP):
|
||||
"""
|
||||
Gaussian Process model for regression with Gaussian variance on the inputs (X_variance)
|
||||
|
||||
This is a thin wrapper around the SparseGP class, with a set of sensible defalts
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, X, X_variance, Y, kernel=None, Z=None, num_inducing=10):
|
||||
"""
|
||||
:param X: input observations
|
||||
:type X: np.ndarray (num_data x input_dim)
|
||||
:param X_variance: The uncertainty in the measurements of X (Gaussian variance, optional)
|
||||
:type X_variance: np.ndarray (num_data x input_dim)
|
||||
:param Y: observed values
|
||||
:param kernel: a GPy kernel, defaults to rbf+white
|
||||
:param Z: inducing inputs (optional, see note)
|
||||
:type Z: np.ndarray (num_inducing x input_dim) | None
|
||||
:param num_inducing: number of inducing points (ignored if Z is passed, see note)
|
||||
:type num_inducing: int
|
||||
:rtype: model object
|
||||
|
||||
.. Note:: If no Z array is passed, num_inducing (default 10) points are selected from the data. Other wise num_inducing is ignored
|
||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||
"""
|
||||
num_data, input_dim = X.shape
|
||||
|
||||
# kern defaults to rbf (plus white for stability)
|
||||
if kernel is None:
|
||||
kernel = kern.rbf(input_dim) + kern.white(input_dim, variance=1e-3)
|
||||
|
||||
# Z defaults to a subset of the data
|
||||
if Z is None:
|
||||
i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
|
||||
Z = X[i].copy()
|
||||
else:
|
||||
assert Z.shape[1] == input_dim
|
||||
|
||||
likelihood = likelihoods.Gaussian()
|
||||
|
||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance)
|
||||
self.ensure_default_constraints()
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
|
|||
|
||||
def dL_dX(self):
|
||||
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X)
|
||||
dL_dX += self.kern.dK_dX(self.dL_dpsi1, self.X, self.Z)
|
||||
dL_dX += self.kern.gradients_X(self.dL_dpsi1, self.X, self.Z)
|
||||
|
||||
return dL_dX
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import pylab as pb
|
|||
import Tango
|
||||
from matplotlib.textpath import TextPath
|
||||
from matplotlib.transforms import offset_copy
|
||||
from ...kern.parts.linear import Linear
|
||||
|
||||
|
||||
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import pylab as pb
|
|||
import numpy as np
|
||||
import Tango
|
||||
from base_plots import gpplot, x_frame1D, x_frame2D
|
||||
from ...util.misc import param_to_array
|
||||
|
||||
|
||||
def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||
|
|
@ -95,8 +96,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
|
||||
#add inducing inputs (if a sparse model is used)
|
||||
if hasattr(model,"Z"):
|
||||
Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
||||
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
|
||||
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
||||
Zu = param_to_array(model.Z[:,free_dims])
|
||||
z_height = ax.get_ylim()[0]
|
||||
ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
|
||||
|
||||
|
||||
#add error bars for uncertain (if input uncertainty is being modelled)
|
||||
if hasattr(model,"has_uncertain_inputs"):
|
||||
|
|
@ -144,18 +148,19 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
|
||||
#add inducing inputs (if a sparse model is used)
|
||||
if hasattr(model,"Z"):
|
||||
Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
||||
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
|
||||
Zu = model.Z[:,free_dims]
|
||||
ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo')
|
||||
|
||||
else:
|
||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||
|
||||
|
||||
def plot_f_fit(model, *args, **kwargs):
|
||||
def plot_fit_f(model, *args, **kwargs):
|
||||
"""
|
||||
Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
|
||||
|
||||
All args and kwargs are passed on to models_plots.plot.
|
||||
"""
|
||||
kwargs['plot_raw'] = True
|
||||
plot(model,*args, **kwargs)
|
||||
plot_fit(model,*args, **kwargs)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import pylab as pb
|
||||
import pylab as pb, numpy as np
|
||||
from ...util.misc import param_to_array
|
||||
|
||||
def plot(parameterized, fignum=None, ax=None, colors=None):
|
||||
"""
|
||||
|
|
@ -13,14 +14,14 @@ def plot(parameterized, fignum=None, ax=None, colors=None):
|
|||
|
||||
"""
|
||||
if ax is None:
|
||||
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.means.shape[1]))))
|
||||
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1]))))
|
||||
if colors is None:
|
||||
colors = pb.gca()._get_lines.color_cycle
|
||||
pb.clf()
|
||||
else:
|
||||
colors = iter(colors)
|
||||
plots = []
|
||||
means, variances = param_to_array(parameterized.means, parameterized.variances)
|
||||
means, variances = param_to_array(parameterized.mean, parameterized.variance)
|
||||
x = np.arange(means.shape[0])
|
||||
for i in range(means.shape[1]):
|
||||
if ax is None:
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import unittest
|
|||
import numpy as np
|
||||
import GPy
|
||||
|
||||
verbose = False
|
||||
verbose = True
|
||||
|
||||
try:
|
||||
import sympy
|
||||
|
|
|
|||
|
|
@ -4,10 +4,11 @@ import GPy
|
|||
from GPy.models import GradientChecker
|
||||
import functools
|
||||
import inspect
|
||||
from GPy.likelihoods.noise_models import gp_transformations
|
||||
from GPy.likelihoods import link_functions
|
||||
from ..core.parameterization import Param
|
||||
from functools import partial
|
||||
#np.random.seed(300)
|
||||
np.random.seed(7)
|
||||
#np.random.seed(7)
|
||||
|
||||
def dparam_partial(inst_func, *args):
|
||||
"""
|
||||
|
|
@ -22,12 +23,14 @@ def dparam_partial(inst_func, *args):
|
|||
the f or Y that are being used in the function whilst we tweak the
|
||||
param
|
||||
"""
|
||||
def param_func(param, inst_func, args):
|
||||
inst_func.im_self._set_params(param)
|
||||
def param_func(param_val, param_name, inst_func, args):
|
||||
#inst_func.im_self._set_params(param)
|
||||
#inst_func.im_self.add_parameter(Param(param_name, param_val))
|
||||
inst_func.im_self[param_name] = param_val
|
||||
return inst_func(*args)
|
||||
return functools.partial(param_func, inst_func=inst_func, args=args)
|
||||
|
||||
def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=False, verbose=False):
|
||||
def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None, randomize=False, verbose=False):
|
||||
"""
|
||||
checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N
|
||||
However if we are holding other parameters fixed and moving something else
|
||||
|
|
@ -38,27 +41,34 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
|
|||
The number of parameters and N is the number of data
|
||||
Need to take a slice out from f and a slice out of df
|
||||
"""
|
||||
#print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__,
|
||||
#func.__name__, dfunc.__name__)
|
||||
print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__,
|
||||
func.__name__, dfunc.__name__)
|
||||
partial_f = dparam_partial(func, *args)
|
||||
partial_df = dparam_partial(dfunc, *args)
|
||||
gradchecking = True
|
||||
for param in params:
|
||||
fnum = np.atleast_1d(partial_f(param)).shape[0]
|
||||
dfnum = np.atleast_1d(partial_df(param)).shape[0]
|
||||
zipped_params = zip(params, params_names)
|
||||
for param_ind, (param_val, param_name) in enumerate(zipped_params):
|
||||
#Check one parameter at a time, make sure it is 2d (as some gradients only return arrays) then strip out the parameter
|
||||
fnum = np.atleast_2d(partial_f(param_val, param_name))[:, param_ind].shape[0]
|
||||
dfnum = np.atleast_2d(partial_df(param_val, param_name))[:, param_ind].shape[0]
|
||||
for fixed_val in range(dfnum):
|
||||
#dlik and dlik_dvar gives back 1 value for each
|
||||
f_ind = min(fnum, fixed_val+1) - 1
|
||||
print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val)
|
||||
#Make grad checker with this param moving, note that set_params is NOT being called
|
||||
#The parameter is being set directly with __setattr__
|
||||
grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind],
|
||||
lambda x : np.atleast_1d(partial_df(x))[fixed_val],
|
||||
param, 'p')
|
||||
#This is not general for more than one param...
|
||||
#Check only the parameter and function value we wish to check at a time
|
||||
grad = GradientChecker(lambda p_val: np.atleast_2d(partial_f(p_val, param_name))[f_ind, param_ind],
|
||||
lambda p_val: np.atleast_2d(partial_df(p_val, param_name))[fixed_val, param_ind],
|
||||
param_val, [param_name])
|
||||
|
||||
if constraints is not None:
|
||||
for constraint in constraints:
|
||||
constraint('p', grad)
|
||||
for constrain_param, constraint in constraints:
|
||||
if grad.grep_param_names(constrain_param):
|
||||
constraint(constrain_param, grad)
|
||||
else:
|
||||
print "parameter didn't exist"
|
||||
print constrain_param, " ", constraint
|
||||
if randomize:
|
||||
grad.randomize()
|
||||
if verbose:
|
||||
|
|
@ -76,7 +86,7 @@ class TestNoiseModels(object):
|
|||
Generic model checker
|
||||
"""
|
||||
def setUp(self):
|
||||
self.N = 5
|
||||
self.N = 15
|
||||
self.D = 3
|
||||
self.X = np.random.rand(self.N, self.D)*10
|
||||
|
||||
|
|
@ -94,7 +104,7 @@ class TestNoiseModels(object):
|
|||
self.var = np.random.rand(1)
|
||||
|
||||
#Make a bigger step as lower bound can be quite curved
|
||||
self.step = 1e-3
|
||||
self.step = 1e-4
|
||||
|
||||
def tearDown(self):
|
||||
self.Y = None
|
||||
|
|
@ -107,17 +117,20 @@ class TestNoiseModels(object):
|
|||
####################################################
|
||||
# Constraint wrappers so we can just list them off #
|
||||
####################################################
|
||||
def constrain_fixed(regex, model):
|
||||
model[regex].constrain_fixed()
|
||||
|
||||
def constrain_negative(regex, model):
|
||||
model.constrain_negative(regex)
|
||||
model[regex].constrain_negative()
|
||||
|
||||
def constrain_positive(regex, model):
|
||||
model.constrain_positive(regex)
|
||||
model[regex].constrain_positive()
|
||||
|
||||
def constrain_bounded(regex, model, lower, upper):
|
||||
"""
|
||||
Used like: partial(constrain_bounded, lower=0, upper=1)
|
||||
"""
|
||||
model.constrain_bounded(regex, lower, upper)
|
||||
model[regex].constrain_bounded(lower, upper)
|
||||
|
||||
"""
|
||||
Dictionary where we nest models we would like to check
|
||||
|
|
@ -134,71 +147,81 @@ class TestNoiseModels(object):
|
|||
}
|
||||
"""
|
||||
noise_models = {"Student_t_default": {
|
||||
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": ["t_noise"],
|
||||
"vals": [self.var],
|
||||
"constraints": [constrain_positive]
|
||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
||||
#"constraints": [("t_noise", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_1_var": {
|
||||
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": ["t_noise"],
|
||||
"vals": [1.0],
|
||||
"constraints": [constrain_positive]
|
||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_small_deg_free": {
|
||||
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": ["t_noise"],
|
||||
"vals": [self.var],
|
||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_small_var": {
|
||||
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": ["t_noise"],
|
||||
"vals": [0.01],
|
||||
"constraints": [constrain_positive]
|
||||
"vals": [0.0001],
|
||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_large_var": {
|
||||
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
|
||||
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": ["t_noise"],
|
||||
"vals": [10.0],
|
||||
"constraints": [constrain_positive]
|
||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_approx_gauss": {
|
||||
"model": GPy.likelihoods.student_t(deg_free=1000, sigma2=self.var),
|
||||
"model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": ["t_noise"],
|
||||
"vals": [self.var],
|
||||
"constraints": [constrain_positive]
|
||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Student_t_log": {
|
||||
"model": GPy.likelihoods.student_t(gp_link=gp_transformations.Log(), deg_free=5, sigma2=self.var),
|
||||
"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
|
||||
"grad_params": {
|
||||
"names": ["t_noise"],
|
||||
"vals": [self.var],
|
||||
"constraints": [constrain_positive]
|
||||
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
|
||||
},
|
||||
"laplace": True
|
||||
},
|
||||
"Gaussian_default": {
|
||||
"model": GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N),
|
||||
"model": GPy.likelihoods.Gaussian(variance=self.var),
|
||||
"grad_params": {
|
||||
"names": ["noise_model_variance"],
|
||||
"names": ["variance"],
|
||||
"vals": [self.var],
|
||||
"constraints": [constrain_positive]
|
||||
"constraints": [("variance", constrain_positive)]
|
||||
},
|
||||
"laplace": True,
|
||||
"ep": True
|
||||
},
|
||||
#"Gaussian_log": {
|
||||
#"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N),
|
||||
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log(), variance=self.var, D=self.D, N=self.N),
|
||||
#"grad_params": {
|
||||
#"names": ["noise_model_variance"],
|
||||
#"vals": [self.var],
|
||||
|
|
@ -207,7 +230,7 @@ class TestNoiseModels(object):
|
|||
#"laplace": True
|
||||
#},
|
||||
#"Gaussian_probit": {
|
||||
#"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Probit(), variance=self.var, D=self.D, N=self.N),
|
||||
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
|
||||
#"grad_params": {
|
||||
#"names": ["noise_model_variance"],
|
||||
#"vals": [self.var],
|
||||
|
|
@ -216,7 +239,7 @@ class TestNoiseModels(object):
|
|||
#"laplace": True
|
||||
#},
|
||||
#"Gaussian_log_ex": {
|
||||
#"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
|
||||
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
|
||||
#"grad_params": {
|
||||
#"names": ["noise_model_variance"],
|
||||
#"vals": [self.var],
|
||||
|
|
@ -225,31 +248,31 @@ class TestNoiseModels(object):
|
|||
#"laplace": True
|
||||
#},
|
||||
"Bernoulli_default": {
|
||||
"model": GPy.likelihoods.bernoulli(),
|
||||
"model": GPy.likelihoods.Bernoulli(),
|
||||
"link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
|
||||
"laplace": True,
|
||||
"Y": self.binary_Y,
|
||||
"ep": True
|
||||
},
|
||||
"Exponential_default": {
|
||||
"model": GPy.likelihoods.exponential(),
|
||||
"link_f_constraints": [constrain_positive],
|
||||
"Y": self.positive_Y,
|
||||
"laplace": True,
|
||||
},
|
||||
"Poisson_default": {
|
||||
"model": GPy.likelihoods.poisson(),
|
||||
"link_f_constraints": [constrain_positive],
|
||||
"Y": self.integer_Y,
|
||||
"laplace": True,
|
||||
"ep": False #Should work though...
|
||||
},
|
||||
"Gamma_default": {
|
||||
"model": GPy.likelihoods.gamma(),
|
||||
"link_f_constraints": [constrain_positive],
|
||||
"Y": self.positive_Y,
|
||||
"laplace": True
|
||||
}
|
||||
#"Exponential_default": {
|
||||
#"model": GPy.likelihoods.exponential(),
|
||||
#"link_f_constraints": [constrain_positive],
|
||||
#"Y": self.positive_Y,
|
||||
#"laplace": True,
|
||||
#},
|
||||
#"Poisson_default": {
|
||||
#"model": GPy.likelihoods.poisson(),
|
||||
#"link_f_constraints": [constrain_positive],
|
||||
#"Y": self.integer_Y,
|
||||
#"laplace": True,
|
||||
#"ep": False #Should work though...
|
||||
#},
|
||||
#"Gamma_default": {
|
||||
#"model": GPy.likelihoods.gamma(),
|
||||
#"link_f_constraints": [constrain_positive],
|
||||
#"Y": self.positive_Y,
|
||||
#"laplace": True
|
||||
#}
|
||||
}
|
||||
|
||||
for name, attributes in noise_models.iteritems():
|
||||
|
|
@ -286,8 +309,8 @@ class TestNoiseModels(object):
|
|||
else:
|
||||
ep = False
|
||||
|
||||
if len(param_vals) > 1:
|
||||
raise NotImplementedError("Cannot support multiple params in likelihood yet!")
|
||||
#if len(param_vals) > 1:
|
||||
#raise NotImplementedError("Cannot support multiple params in likelihood yet!")
|
||||
|
||||
#Required by all
|
||||
#Normal derivatives
|
||||
|
|
@ -302,13 +325,13 @@ class TestNoiseModels(object):
|
|||
yield self.t_d3logpdf_df3, model, Y, f
|
||||
yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints
|
||||
#Params
|
||||
yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_constraints
|
||||
yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_constraints
|
||||
yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_constraints
|
||||
yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_names, param_constraints
|
||||
yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_names, param_constraints
|
||||
yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_names, param_constraints
|
||||
#Link params
|
||||
yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_constraints
|
||||
yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_constraints
|
||||
yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_constraints
|
||||
yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_names, param_constraints
|
||||
yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_names, param_constraints
|
||||
yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_names, param_constraints
|
||||
|
||||
#laplace likelihood gradcheck
|
||||
yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
|
||||
|
|
@ -370,33 +393,33 @@ class TestNoiseModels(object):
|
|||
# df_dparams #
|
||||
##############
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints):
|
||||
def t_dlogpdf_dparams(self, model, Y, f, params, params_names, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
assert (
|
||||
dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
|
||||
params, args=(f, Y), constraints=param_constraints,
|
||||
randomize=True, verbose=True)
|
||||
params, params_names, args=(f, Y), constraints=param_constraints,
|
||||
randomize=False, verbose=True)
|
||||
)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints):
|
||||
def t_dlogpdf_df_dparams(self, model, Y, f, params, params_names, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
assert (
|
||||
dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
|
||||
params, args=(f, Y), constraints=param_constraints,
|
||||
randomize=True, verbose=True)
|
||||
params, params_names, args=(f, Y), constraints=param_constraints,
|
||||
randomize=False, verbose=True)
|
||||
)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints):
|
||||
def t_d2logpdf2_df2_dparams(self, model, Y, f, params, params_names, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
assert (
|
||||
dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
|
||||
params, args=(f, Y), constraints=param_constraints,
|
||||
randomize=True, verbose=True)
|
||||
params, params_names, args=(f, Y), constraints=param_constraints,
|
||||
randomize=False, verbose=True)
|
||||
)
|
||||
|
||||
################
|
||||
|
|
@ -454,32 +477,32 @@ class TestNoiseModels(object):
|
|||
# dlink_dparams #
|
||||
#################
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_dlogpdf_link_dparams(self, model, Y, f, params, param_constraints):
|
||||
def t_dlogpdf_link_dparams(self, model, Y, f, params, param_names, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
assert (
|
||||
dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta,
|
||||
params, args=(f, Y), constraints=param_constraints,
|
||||
params, param_names, args=(f, Y), constraints=param_constraints,
|
||||
randomize=False, verbose=True)
|
||||
)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_constraints):
|
||||
def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_names, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
assert (
|
||||
dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta,
|
||||
params, args=(f, Y), constraints=param_constraints,
|
||||
params, param_names, args=(f, Y), constraints=param_constraints,
|
||||
randomize=False, verbose=True)
|
||||
)
|
||||
|
||||
@with_setup(setUp, tearDown)
|
||||
def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_constraints):
|
||||
def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_names, param_constraints):
|
||||
print "\n{}".format(inspect.stack()[0][3])
|
||||
print model
|
||||
assert (
|
||||
dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta,
|
||||
params, args=(f, Y), constraints=param_constraints,
|
||||
params, param_names, args=(f, Y), constraints=param_constraints,
|
||||
randomize=False, verbose=True)
|
||||
)
|
||||
|
||||
|
|
@ -493,18 +516,23 @@ class TestNoiseModels(object):
|
|||
Y = Y/Y.max()
|
||||
white_var = 1e-6
|
||||
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), model)
|
||||
m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=laplace_likelihood)
|
||||
laplace_likelihood = GPy.inference.latent_function_inference.LaplaceInference()
|
||||
m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
|
||||
m.ensure_default_constraints()
|
||||
m.constrain_fixed('white', white_var)
|
||||
m['white'].constrain_fixed(white_var)
|
||||
|
||||
for param_num in range(len(param_names)):
|
||||
name = param_names[param_num]
|
||||
m[name] = param_vals[param_num]
|
||||
constraints[param_num](name, m)
|
||||
#Set constraints
|
||||
for constrain_param, constraint in constraints:
|
||||
constraint(constrain_param, m)
|
||||
|
||||
print m
|
||||
m.randomize()
|
||||
|
||||
#Set params
|
||||
for param_num in range(len(param_names)):
|
||||
name = param_names[param_num]
|
||||
m[name] = param_vals[param_num]
|
||||
|
||||
#m.optimize(max_iters=8)
|
||||
print m
|
||||
m.checkgrad(verbose=1, step=step)
|
||||
|
|
@ -525,10 +553,10 @@ class TestNoiseModels(object):
|
|||
Y = Y/Y.max()
|
||||
white_var = 1e-6
|
||||
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
ep_likelihood = GPy.likelihoods.EP(Y.copy(), model)
|
||||
m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=ep_likelihood)
|
||||
ep_inf = GPy.inference.latent_function_inference.EP()
|
||||
m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
|
||||
m.ensure_default_constraints()
|
||||
m.constrain_fixed('white', white_var)
|
||||
m['white'].constrain_fixed(white_var)
|
||||
|
||||
for param_num in range(len(param_names)):
|
||||
name = param_names[param_num]
|
||||
|
|
@ -559,8 +587,8 @@ class LaplaceTests(unittest.TestCase):
|
|||
self.var = 0.2
|
||||
|
||||
self.var = np.random.rand(1)
|
||||
self.stu_t = GPy.likelihoods.student_t(deg_free=5, sigma2=self.var)
|
||||
self.gauss = GPy.likelihoods.gaussian(gp_transformations.Log(), variance=self.var, D=self.D, N=self.N)
|
||||
self.stu_t = GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var)
|
||||
self.gauss = GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var)
|
||||
|
||||
#Make a bigger step as lower bound can be quite curved
|
||||
self.step = 1e-6
|
||||
|
|
@ -584,7 +612,7 @@ class LaplaceTests(unittest.TestCase):
|
|||
noise = np.random.randn(*self.X.shape)*self.real_std
|
||||
self.Y = np.sin(self.X*2*np.pi) + noise
|
||||
self.f = np.random.rand(self.N, 1)
|
||||
self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N)
|
||||
self.gauss = GPy.likelihoods.Gaussian(variance=self.var)
|
||||
|
||||
dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y)
|
||||
d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y)
|
||||
|
|
@ -605,23 +633,27 @@ class LaplaceTests(unittest.TestCase):
|
|||
#Yc = Y.copy()
|
||||
#Yc[75:80] += 1
|
||||
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
kernel2 = kernel1.copy()
|
||||
#FIXME: Make sure you can copy kernels when params is fixed
|
||||
#kernel2 = kernel1.copy()
|
||||
kernel2 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
|
||||
|
||||
m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
|
||||
m1.constrain_fixed('white', 1e-6)
|
||||
m1['noise'] = initial_var_guess
|
||||
m1.constrain_bounded('noise', 1e-4, 10)
|
||||
m1.constrain_bounded('rbf', 1e-4, 10)
|
||||
gauss_distr1 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
|
||||
exact_inf = GPy.inference.latent_function_inference.ExactGaussianInference()
|
||||
m1 = GPy.core.GP(X, Y.copy(), kernel=kernel1, likelihood=gauss_distr1, inference_method=exact_inf)
|
||||
m1['white'].constrain_fixed(1e-6)
|
||||
m1['variance'] = initial_var_guess
|
||||
m1['variance'].constrain_bounded(1e-4, 10)
|
||||
m1['rbf'].constrain_bounded(1e-4, 10)
|
||||
m1.ensure_default_constraints()
|
||||
m1.randomize()
|
||||
|
||||
gauss_distr = GPy.likelihoods.gaussian(variance=initial_var_guess, D=1, N=Y.shape[0])
|
||||
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), gauss_distr)
|
||||
m2 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel2, likelihood=laplace_likelihood)
|
||||
gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
|
||||
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
|
||||
m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
|
||||
m2.ensure_default_constraints()
|
||||
m2.constrain_fixed('white', 1e-6)
|
||||
m2.constrain_bounded('rbf', 1e-4, 10)
|
||||
m2.constrain_bounded('noise', 1e-4, 10)
|
||||
m2['white'].constrain_fixed(1e-6)
|
||||
m2['rbf'].constrain_bounded(1e-4, 10)
|
||||
m2['variance'].constrain_bounded(1e-4, 10)
|
||||
m2.randomize()
|
||||
|
||||
if debug:
|
||||
|
|
@ -667,7 +699,7 @@ class LaplaceTests(unittest.TestCase):
|
|||
|
||||
|
||||
#Check Y's are the same
|
||||
np.testing.assert_almost_equal(Y, m2.likelihood.Y, decimal=5)
|
||||
np.testing.assert_almost_equal(m1.Y, m2.Y, decimal=5)
|
||||
#Check marginals are the same
|
||||
np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2)
|
||||
#Check marginals are the same with random
|
||||
|
|
|
|||
|
|
@ -9,42 +9,44 @@ import numpy
|
|||
import GPy
|
||||
import itertools
|
||||
from GPy.core import Model
|
||||
from GPy.core.parameterization.param import Param
|
||||
from GPy.core.parameterization.transformations import Logexp
|
||||
|
||||
class PsiStatModel(Model):
|
||||
def __init__(self, which, X, X_variance, Z, num_inducing, kernel):
|
||||
super(PsiStatModel, self).__init__(name='psi stat test')
|
||||
self.which = which
|
||||
self.X = X
|
||||
self.X_variance = X_variance
|
||||
self.Z = Z
|
||||
self.X = Param("X", X)
|
||||
self.X_variance = Param('X_variance', X_variance, Logexp())
|
||||
self.Z = Param("Z", Z)
|
||||
self.N, self.input_dim = X.shape
|
||||
self.num_inducing, input_dim = Z.shape
|
||||
assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
|
||||
self.kern = kernel
|
||||
super(PsiStatModel, self).__init__()
|
||||
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
|
||||
def _get_param_names(self):
|
||||
Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.input_dim))]
|
||||
Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.num_inducing), range(self.input_dim))]
|
||||
return Xnames + Znames + self.kern._get_param_names()
|
||||
def _get_params(self):
|
||||
return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()])
|
||||
def _set_params(self, x, save_old=True, save_count=0):
|
||||
start, end = 0, self.X.size
|
||||
self.X = x[start:end].reshape(self.N, self.input_dim)
|
||||
start, end = end, end + self.X_variance.size
|
||||
self.X_variance = x[start: end].reshape(self.N, self.input_dim)
|
||||
start, end = end, end + self.Z.size
|
||||
self.Z = x[start: end].reshape(self.num_inducing, self.input_dim)
|
||||
self.kern._set_params(x[end:])
|
||||
self.add_parameters(self.X, self.X_variance, self.Z, self.kern)
|
||||
|
||||
def log_likelihood(self):
|
||||
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
|
||||
def _log_likelihood_gradients(self):
|
||||
|
||||
def parameters_changed(self):
|
||||
psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
|
||||
self.X.gradient = psimu
|
||||
self.X_variance.gradient = psiS
|
||||
#psimu, psiS = numpy.ones(self.N * self.input_dim), numpy.ones(self.N * self.input_dim)
|
||||
psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
|
||||
try: psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
|
||||
except AttributeError: psiZ = numpy.zeros_like(self.Z)
|
||||
self.Z.gradient = psiZ
|
||||
#psiZ = numpy.ones(self.num_inducing * self.input_dim)
|
||||
thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten()
|
||||
return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad))
|
||||
N,M = self.X.shape[0], self.Z.shape[0]
|
||||
dL_dpsi0, dL_dpsi1, dL_dpsi2 = numpy.zeros([N]), numpy.zeros([N,M]), numpy.zeros([N,M,M])
|
||||
if self.which == 'psi0': dL_dpsi0 += 1
|
||||
if self.which == 'psi1': dL_dpsi1 += 1
|
||||
if self.which == 'psi2': dL_dpsi2 += 1
|
||||
self.kern.update_gradients_variational(numpy.zeros([1,1]),
|
||||
dL_dpsi0,
|
||||
dL_dpsi1,
|
||||
dL_dpsi2, self.X, self.X_variance, self.Z)
|
||||
|
||||
class DPsiStatTest(unittest.TestCase):
|
||||
input_dim = 5
|
||||
|
|
@ -57,61 +59,58 @@ class DPsiStatTest(unittest.TestCase):
|
|||
Y = X.dot(numpy.random.randn(input_dim, input_dim))
|
||||
# kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)]
|
||||
|
||||
kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim),
|
||||
GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim),
|
||||
GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)]
|
||||
kernels = [
|
||||
GPy.kern.linear(input_dim),
|
||||
GPy.kern.rbf(input_dim),
|
||||
#GPy.kern.bias(input_dim),
|
||||
#GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim),
|
||||
#GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)
|
||||
]
|
||||
|
||||
def testPsi0(self):
|
||||
for k in self.kernels:
|
||||
m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,\
|
||||
num_inducing=self.num_inducing, kernel=k)
|
||||
m.ensure_default_constraints()
|
||||
m.randomize()
|
||||
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts)))
|
||||
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||
|
||||
def testPsi1(self):
|
||||
for k in self.kernels:
|
||||
m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||
num_inducing=self.num_inducing, kernel=k)
|
||||
m.ensure_default_constraints()
|
||||
m.randomize()
|
||||
assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
|
||||
assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||
|
||||
def testPsi2_lin(self):
|
||||
k = self.kernels[0]
|
||||
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||
num_inducing=self.num_inducing, kernel=k)
|
||||
m.ensure_default_constraints()
|
||||
m.randomize()
|
||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||
def testPsi2_lin_bia(self):
|
||||
k = self.kernels[3]
|
||||
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||
num_inducing=self.num_inducing, kernel=k)
|
||||
m.ensure_default_constraints()
|
||||
m.randomize()
|
||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||
def testPsi2_rbf(self):
|
||||
k = self.kernels[1]
|
||||
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||
num_inducing=self.num_inducing, kernel=k)
|
||||
m.ensure_default_constraints()
|
||||
m.randomize()
|
||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||
def testPsi2_rbf_bia(self):
|
||||
k = self.kernels[-1]
|
||||
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||
num_inducing=self.num_inducing, kernel=k)
|
||||
m.ensure_default_constraints()
|
||||
m.randomize()
|
||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||
def testPsi2_bia(self):
|
||||
k = self.kernels[2]
|
||||
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||
num_inducing=self.num_inducing, kernel=k)
|
||||
m.ensure_default_constraints()
|
||||
m.randomize()
|
||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
||||
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import datasets
|
|||
import mocap
|
||||
import decorators
|
||||
import classification
|
||||
import caching
|
||||
|
||||
try:
|
||||
import sympy
|
||||
|
|
|
|||
40
GPy/util/caching.py
Normal file
40
GPy/util/caching.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
from ..core.parameterization.array_core import ObservableArray, ParamList
|
||||
class Cacher(object):
|
||||
def __init__(self, operation, limit=5):
|
||||
self.limit = int(limit)
|
||||
self.operation=operation
|
||||
self.cached_inputs = ParamList([])
|
||||
self.cached_outputs = []
|
||||
self.inputs_changed = []
|
||||
|
||||
def __call__(self, X):
|
||||
assert isinstance(X, ObservableArray)
|
||||
if X in self.cached_inputs:
|
||||
i = self.cached_inputs.index(X)
|
||||
if self.inputs_changed[i]:
|
||||
self.cached_outputs[i] = self.operation(X)
|
||||
self.inputs_changed[i] = False
|
||||
return self.cached_outputs[i]
|
||||
else:
|
||||
if len(self.cached_inputs) == self.limit:
|
||||
X_ = self.cached_inputs.pop(0)
|
||||
X_.remove_observer(self)
|
||||
self.inputs_changed.pop(0)
|
||||
self.cached_outputs.pop(0)
|
||||
|
||||
self.cached_inputs.append(X)
|
||||
self.cached_outputs.append(self.operation(X))
|
||||
self.inputs_changed.append(False)
|
||||
X.add_observer(self, self.on_cache_changed)
|
||||
return self.cached_outputs[-1]
|
||||
|
||||
def on_cache_changed(self, X):
|
||||
print id(X)
|
||||
i = self.cached_inputs.index(X)
|
||||
self.inputs_changed[i] = True
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -41,6 +41,17 @@ else:
|
|||
_blas_available = False
|
||||
warnings.warn("warning: caught this exception:" + str(e))
|
||||
|
||||
def dtrtri(L, lower=0):
|
||||
"""
|
||||
Wrapper for lapack dtrtrs function
|
||||
Inverse of L
|
||||
|
||||
:param L: Triangular Matrix L
|
||||
:param lower: is matrix lower (true) or upper (false)
|
||||
:returns: Li, info
|
||||
"""
|
||||
return lapack.dtrtri(L, lower=lower)
|
||||
|
||||
def dtrtrs(A, B, lower=0, trans=0, unitdiag=0):
|
||||
"""
|
||||
Wrapper for lapack dtrtrs function
|
||||
|
|
|
|||
|
|
@ -184,4 +184,4 @@ from :class:ndarray)"""
|
|||
assert len(param) > 0, "At least one parameter needed"
|
||||
if len(param) == 1:
|
||||
return param[0].view(np.ndarray)
|
||||
return map(lambda x: x.view(np.ndarray), param)
|
||||
return [x.view(np.ndarray) for x in param]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue