mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-09 03:52:39 +02:00
Merged parameterized fixing
This commit is contained in:
commit
b72d319909
41 changed files with 248 additions and 270 deletions
|
|
@ -439,6 +439,8 @@ class Model(Parameterized):
|
|||
print "No free parameters to check"
|
||||
return
|
||||
|
||||
gradient = self.objective_function_gradients(x)
|
||||
np.where(gradient==0, 1e-312, gradient)
|
||||
|
||||
for i in param_list:
|
||||
xx = x.copy()
|
||||
|
|
@ -446,11 +448,10 @@ class Model(Parameterized):
|
|||
f1, g1 = self.objective_and_gradients(xx)
|
||||
xx[i] -= 2.*step
|
||||
f2, g2 = self.objective_and_gradients(xx)
|
||||
gradient = self.objective_function_gradients(x)[i]
|
||||
|
||||
numerical_gradient = (f1 - f2) / (2 * step)
|
||||
ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient))
|
||||
difference = np.abs((f1 - f2) / 2 / step - gradient)
|
||||
ratio = (f1 - f2) / (2 * step * gradient[i])
|
||||
difference = np.abs((f1 - f2) / 2 / step - gradient[i])
|
||||
|
||||
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
|
||||
formatted_name = "\033[92m {0} \033[0m".format(names[i])
|
||||
|
|
@ -458,7 +459,7 @@ class Model(Parameterized):
|
|||
formatted_name = "\033[91m {0} \033[0m".format(names[i])
|
||||
r = '%.6f' % float(ratio)
|
||||
d = '%.6f' % float(difference)
|
||||
g = '%.6f' % gradient
|
||||
g = '%.6f' % gradient[i]
|
||||
ng = '%.6f' % float(numerical_gradient)
|
||||
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
|
||||
print grad_string
|
||||
|
|
|
|||
|
|
@ -15,8 +15,18 @@ class ListArray(np.ndarray):
|
|||
def __new__(cls, input_array):
|
||||
obj = np.asanyarray(input_array).view(cls)
|
||||
return obj
|
||||
def __eq__(self, other):
|
||||
return other is self
|
||||
#def __eq__(self, other):
|
||||
# return other is self
|
||||
|
||||
class ParamList(list):
|
||||
|
||||
def __contains__(self, other):
|
||||
for el in self:
|
||||
if el is other:
|
||||
return True
|
||||
return False
|
||||
|
||||
pass
|
||||
|
||||
class ObservableArray(ListArray, Observable):
|
||||
"""
|
||||
|
|
@ -36,16 +46,19 @@ class ObservableArray(ListArray, Observable):
|
|||
if obj is None: return
|
||||
self._observers_ = getattr(obj, '_observers_', None)
|
||||
def __setitem__(self, s, val, update=True):
|
||||
if self.ndim:
|
||||
if not np.all(np.equal(self[s], val)):
|
||||
super(ObservableArray, self).__setitem__(s, val)
|
||||
if update:
|
||||
self._notify_observers()
|
||||
else:
|
||||
if not np.all(np.equal(self, val)):
|
||||
super(ObservableArray, self).__setitem__(Ellipsis, val)
|
||||
if update:
|
||||
self._notify_observers()
|
||||
# if self.ndim:
|
||||
# if not np.all(np.equal(self[s], val)):
|
||||
# super(ObservableArray, self).__setitem__(s, val)
|
||||
# if update:
|
||||
# self._notify_observers()
|
||||
# else:
|
||||
# if not np.all(np.equal(self, val)):
|
||||
# super(ObservableArray, self).__setitem__(Ellipsis, val)
|
||||
# if update:
|
||||
# self._notify_observers()
|
||||
def __getslice__(self, start, stop):
|
||||
return self.__getitem__(slice(start, stop))
|
||||
def __setslice__(self, start, stop, val):
|
||||
|
|
|
|||
|
|
@ -90,11 +90,6 @@ class ParameterIndexOperations(object):
|
|||
return self._properties.values()
|
||||
|
||||
def properties_for(self, index):
|
||||
# already_seen = dict()
|
||||
# for ni in index:
|
||||
# if ni not in already_seen:
|
||||
# already_seen[ni] = [prop for prop in self.iter_properties() if ni in self._properties[prop]]
|
||||
# yield already_seen[ni]
|
||||
return vectorize(lambda i: [prop for prop in self.iter_properties() if i in self._properties[prop]], otypes=[list])(index)
|
||||
|
||||
def add(self, prop, indices):
|
||||
|
|
@ -111,70 +106,11 @@ class ParameterIndexOperations(object):
|
|||
self._properties[prop] = diff
|
||||
else:
|
||||
del self._properties[prop]
|
||||
#[self._reverse[i].remove(prop) for i in removed if prop in self._reverse[i]]
|
||||
return removed.astype(int)
|
||||
# else:
|
||||
# for a in self.properties():
|
||||
# if numpy.all(a==prop) and a._parent_index_ == prop._parent_index_:
|
||||
# ind = create_raveled_indices(indices, shape, offset)
|
||||
# diff = remove_indices(self[a], ind)
|
||||
# removed = numpy.intersect1d(self[a], ind, True)
|
||||
# if not index_empty(diff):
|
||||
# self._properties[a] = diff
|
||||
# else:
|
||||
# del self._properties[a]
|
||||
# [self._reverse[i].remove(a) for i in removed if a in self._reverse[i]]
|
||||
# return removed.astype(int)
|
||||
return numpy.array([]).astype(int)
|
||||
def __getitem__(self, prop):
|
||||
return self._properties[prop]
|
||||
|
||||
# class TieIndexOperations(object):
|
||||
# def __init__(self, params):
|
||||
# self.params = params
|
||||
# self.tied_from = ParameterIndexOperations()
|
||||
# self.tied_to = ParameterIndexOperations()
|
||||
# def add(self, tied_from, tied_to):
|
||||
# rav_from = self.params._raveled_index_for(tied_from)
|
||||
# rav_to = self.params._raveled_index_for(tied_to)
|
||||
# self.tied_from.add(tied_to, rav_from)
|
||||
# self.tied_to.add(tied_to, rav_to)
|
||||
# return rav_from, rav_to
|
||||
# def remove(self, tied_from, tied_to):
|
||||
# rav_from = self.params._raveled_index_for(tied_from)
|
||||
# rav_to = self.params._raveled_index_for(tied_to)
|
||||
# rem_from = self.tied_from.remove(tied_to, rav_from)
|
||||
# rem_to = self.tied_to.remove(tied_to, rav_to)
|
||||
# left_from = self.tied_from._properties.pop(tied_to)
|
||||
# left_to = self.tied_to._properties.pop(tied_to)
|
||||
# self.tied_from[numpy.delete(tied_to, rem_from)] = left_from
|
||||
# self.tied_to[numpy.delete(tied_to, rem_to)] = left_to
|
||||
# return rav_from, rav_to
|
||||
# def from_to_for(self, index):
|
||||
# return self.tied_from.properties_for(index), self.tied_to.properties_for(index)
|
||||
# def iter_from_to_indices(self):
|
||||
# for k, f in self.tied_from.iteritems():
|
||||
# yield f, self.tied_to[k]
|
||||
# def iter_to_indices(self):
|
||||
# return self.tied_to.iterindices()
|
||||
# def iter_from_indices(self):
|
||||
# return self.tied_from.iterindices()
|
||||
# def iter_from_items(self):
|
||||
# for f, i in self.tied_from.iteritems():
|
||||
# yield f, i
|
||||
# def iter_properties(self):
|
||||
# return self.tied_from.iter_properties()
|
||||
# def properties(self):
|
||||
# return self.tied_from.properties()
|
||||
# def from_to_indices(self, param):
|
||||
# return self.tied_from[param], self.tied_to[param]
|
||||
#
|
||||
# # def create_raveled_indices(index, shape, offset=0):
|
||||
# # if isinstance(index, (tuple, list)): i = [slice(None)] + list(index)
|
||||
# # else: i = [slice(None), index]
|
||||
# # ind = numpy.array(numpy.ravel_multi_index(numpy.indices(shape)[i], shape)).flat + numpy.int_(offset)
|
||||
# # return ind
|
||||
|
||||
def combine_indices(arr1, arr2):
|
||||
return numpy.union1d(arr1, arr2)
|
||||
|
||||
|
|
|
|||
|
|
@ -75,7 +75,6 @@ class Param(ObservableArray, Constrainable):
|
|||
super(Param, self).__array_finalize__(obj)
|
||||
self._direct_parent_ = getattr(obj, '_direct_parent_', None)
|
||||
self._parent_index_ = getattr(obj, '_parent_index_', None)
|
||||
self._highest_parent_ = getattr(obj, '_highest_parent_', None)
|
||||
self._current_slice_ = getattr(obj, '_current_slice_', None)
|
||||
self._tied_to_me_ = getattr(obj, '_tied_to_me_', None)
|
||||
self._tied_to_ = getattr(obj, '_tied_to_', None)
|
||||
|
|
@ -98,7 +97,6 @@ class Param(ObservableArray, Constrainable):
|
|||
(self.name,
|
||||
self._direct_parent_,
|
||||
self._parent_index_,
|
||||
self._highest_parent_,
|
||||
self._current_slice_,
|
||||
self._realshape_,
|
||||
self._realsize_,
|
||||
|
|
@ -119,7 +117,6 @@ class Param(ObservableArray, Constrainable):
|
|||
self._realsize_ = state.pop()
|
||||
self._realshape_ = state.pop()
|
||||
self._current_slice_ = state.pop()
|
||||
self._highest_parent_ = state.pop()
|
||||
self._parent_index_ = state.pop()
|
||||
self._direct_parent_ = state.pop()
|
||||
self.name = state.pop()
|
||||
|
|
@ -153,8 +150,6 @@ class Param(ObservableArray, Constrainable):
|
|||
@property
|
||||
def _parameters_(self):
|
||||
return []
|
||||
def _connect_highest_parent(self, highest_parent):
|
||||
self._highest_parent_ = highest_parent
|
||||
def _collect_gradient(self, target):
|
||||
target[:] = self.gradient.flat
|
||||
#===========================================================================
|
||||
|
|
@ -456,7 +451,7 @@ class ParamConcatenation(object):
|
|||
See :py:class:`GPy.core.parameter.Param` for more details on constraining.
|
||||
"""
|
||||
# self.params = params
|
||||
self.params = []
|
||||
self.params = ParamList([])
|
||||
for p in params:
|
||||
for p in p.flattened_parameters:
|
||||
if p not in self.params:
|
||||
|
|
|
|||
|
|
@ -48,19 +48,25 @@ class Pickleable(object):
|
|||
#===============================================================================
|
||||
|
||||
class Parentable(object):
|
||||
def __init__(self, direct_parent=None, highest_parent=None, parent_index=None):
|
||||
def __init__(self, direct_parent=None, parent_index=None):
|
||||
super(Parentable,self).__init__()
|
||||
self._direct_parent_ = direct_parent
|
||||
self._parent_index_ = parent_index
|
||||
self._highest_parent_ = highest_parent
|
||||
|
||||
def has_parent(self):
|
||||
return self._direct_parent_ is not None and self._highest_parent_ is not None
|
||||
return self._direct_parent_ is not None
|
||||
|
||||
@property
|
||||
def _highest_parent_(self):
|
||||
if self._direct_parent_ is None:
|
||||
return self
|
||||
return self._direct_parent_._highest_parent_
|
||||
|
||||
class Nameable(Parentable):
|
||||
_name = None
|
||||
def __init__(self, name, direct_parent=None, highest_parent=None, parent_index=None):
|
||||
super(Nameable,self).__init__(direct_parent, highest_parent, parent_index)
|
||||
def __init__(self, name, direct_parent=None, parent_index=None):
|
||||
super(Nameable,self).__init__(direct_parent, parent_index)
|
||||
self._name = name or self.__class__.__name__
|
||||
|
||||
@property
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ from param import ParamConcatenation, Param
|
|||
from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing
|
||||
from index_operations import ParameterIndexOperations,\
|
||||
index_empty
|
||||
from array_core import ParamList
|
||||
|
||||
#===============================================================================
|
||||
# Printing:
|
||||
|
|
@ -69,7 +70,7 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
super(Parameterized, self).__init__(name=name)
|
||||
self._in_init_ = True
|
||||
self._constraints_ = None#ParameterIndexOperations()
|
||||
self._parameters_ = []
|
||||
self._parameters_ = ParamList()
|
||||
self.size = sum(p.size for p in self._parameters_)
|
||||
if not self._has_fixes():
|
||||
self._fixes_ = None
|
||||
|
|
@ -188,10 +189,10 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
note: if it is a string object it will not (!) be regexp-matched
|
||||
automatically.
|
||||
"""
|
||||
self._parameters_ = [p for p in self._parameters_
|
||||
self._parameters_ = ParamList([p for p in self._parameters_
|
||||
if not (p._parent_index_ in names_params_indices
|
||||
or p.name in names_params_indices
|
||||
or p in names_params_indices)]
|
||||
or p in names_params_indices)])
|
||||
self._connect_parameters()
|
||||
|
||||
def parameters_changed(self):
|
||||
|
|
@ -216,7 +217,6 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
for i,p in enumerate(self._parameters_):
|
||||
p._direct_parent_ = self
|
||||
p._parent_index_ = i
|
||||
p._connect_highest_parent(self)
|
||||
not_unique = []
|
||||
sizes.append(p.size+sizes[-1])
|
||||
self._param_slices_.append(slice(sizes[-2], sizes[-1]))
|
||||
|
|
@ -231,14 +231,6 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
self.__dict__[pname] = p
|
||||
self._added_names_.add(pname)
|
||||
|
||||
def _connect_highest_parent(self, highest_parent):
|
||||
self._highest_parent_ = highest_parent
|
||||
if not hasattr(self, "_parameters_") or len(self._parameters_) < 1:
|
||||
# no parameters for this class
|
||||
return
|
||||
for p in self._parameters_:
|
||||
p._connect_highest_parent(highest_parent)
|
||||
|
||||
#===========================================================================
|
||||
# Pickling operations
|
||||
#===========================================================================
|
||||
|
|
@ -382,6 +374,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
that is an int array, containing the indexes for the flattened
|
||||
param inside this parameterized logic.
|
||||
"""
|
||||
if isinstance(param, ParamConcatenation):
|
||||
return numpy.hstack((self._raveled_index_for(p) for p in param.params))
|
||||
return param._raveled_index() + self._offset_for(param)
|
||||
|
||||
def _raveled_index(self):
|
||||
|
|
@ -419,10 +413,10 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
#===========================================================================
|
||||
# Fixing parameters:
|
||||
#===========================================================================
|
||||
def _fix(self, param, warning=True, update=True):
|
||||
def _fix(self, param, warning=True):
|
||||
f = self._add_constrain(param, __fixed__, warning)
|
||||
self._set_fixed(f)
|
||||
def _unfix(self, param, update=True):
|
||||
def _unfix(self, param):
|
||||
if self._has_fixes():
|
||||
f = self._remove_constrain(param, __fixed__)
|
||||
self._set_unfixed(f)
|
||||
|
|
@ -446,8 +440,7 @@ class Parameterized(Constrainable, Pickleable, Observable):
|
|||
# if advanced indexing is activated it happens that the array is a copy
|
||||
# you can retrieve the original param through this method, by passing
|
||||
# the copy here
|
||||
#return self._parameters_[param._parent_index_]
|
||||
return param._direct_parent_._parameters_[param._parent_index_]
|
||||
return self._parameters_[param._parent_index_]
|
||||
def hirarchy_name(self):
|
||||
if self.has_parent():
|
||||
return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name) + "."
|
||||
|
|
|
|||
|
|
@ -3,10 +3,8 @@ Created on 6 Nov 2013
|
|||
|
||||
@author: maxz
|
||||
'''
|
||||
import numpy as np
|
||||
from parameterized import Parameterized
|
||||
from param import Param
|
||||
from ...util.misc import param_to_array
|
||||
|
||||
class Normal(Parameterized):
|
||||
'''
|
||||
|
|
@ -26,6 +24,7 @@ class Normal(Parameterized):
|
|||
|
||||
See GPy.plotting.matplot_dep.variational_plots
|
||||
"""
|
||||
import sys
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import variational_plots
|
||||
from ...plotting.matplot_dep import variational_plots
|
||||
return variational_plots.plot(self,*args)
|
||||
|
|
|
|||
|
|
@ -15,54 +15,54 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False):
|
|||
num_inducing = 5
|
||||
if plot:
|
||||
output_dim = 1
|
||||
input_dim = 2
|
||||
input_dim = 3
|
||||
else:
|
||||
input_dim = 2
|
||||
input_dim = 1
|
||||
output_dim = 25
|
||||
|
||||
# generate GPLVM-like data
|
||||
X = _np.random.rand(num_inputs, input_dim)
|
||||
lengthscales = _np.random.rand(input_dim)
|
||||
k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True)
|
||||
+ GPy.kern.white(input_dim, 0.01))
|
||||
#+ GPy.kern.white(input_dim, 0.01)
|
||||
)
|
||||
K = k.K(X)
|
||||
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T
|
||||
lik = Gaussian(Y, normalize=True)
|
||||
|
||||
k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
|
||||
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
|
||||
k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
|
||||
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
|
||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
|
||||
|
||||
m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing)
|
||||
m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
|
||||
#===========================================================================
|
||||
# randomly obstruct data with percentage p
|
||||
p = .8
|
||||
Y_obstruct = Y.copy()
|
||||
Y_obstruct[_np.random.uniform(size=(Y.shape)) < p] = _np.nan
|
||||
#===========================================================================
|
||||
m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
|
||||
#m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
|
||||
m.lengthscales = lengthscales
|
||||
|
||||
if plot:
|
||||
import matplotlib.pyplot as pb
|
||||
m.plot()
|
||||
pb.title('PCA initialisation')
|
||||
m2.plot()
|
||||
pb.title('PCA initialisation')
|
||||
#m2.plot()
|
||||
#pb.title('PCA initialisation')
|
||||
|
||||
if optimize:
|
||||
m.optimize('scg', messages=verbose)
|
||||
m2.optimize('scg', messages=verbose)
|
||||
#m2.optimize('scg', messages=verbose)
|
||||
if plot:
|
||||
m.plot()
|
||||
pb.title('After optimisation')
|
||||
m2.plot()
|
||||
pb.title('After optimisation')
|
||||
#m2.plot()
|
||||
#pb.title('After optimisation')
|
||||
|
||||
return m, m2
|
||||
return m
|
||||
|
||||
def gplvm_oil_100(optimize=True, verbose=1, plot=True):
|
||||
import GPy
|
||||
|
|
@ -264,16 +264,16 @@ def bgplvm_simulation(optimize=True, verbose=1,
|
|||
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
|
||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||
Y = Ylist[0]
|
||||
k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||
k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
|
||||
m['noise'] = Y.var() / 100.
|
||||
m.Gaussian_noise = Y.var() / 100.
|
||||
|
||||
if optimize:
|
||||
print "Optimizing model:"
|
||||
m.optimize('scg', messages=verbose, max_iters=max_iters,
|
||||
gtol=.05)
|
||||
if plot:
|
||||
m.plot_X_1d("BGPLVM Latent Space 1D")
|
||||
m.q.plot("BGPLVM Latent Space 1D")
|
||||
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
|
||||
return m
|
||||
|
||||
|
|
|
|||
|
|
@ -118,8 +118,8 @@ class FITC(SparseGP):
|
|||
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
|
||||
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
|
||||
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
|
||||
self._dKmm_dX += self.kern.dK_dX(_dKmm ,self.Z)
|
||||
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
|
||||
self._dKmm_dX += self.kern.gradients_X(_dKmm ,self.Z)
|
||||
self._dpsi1_dX += self.kern.gradients_X(_dpsi1.T,self.Z,self.X[i:i+1,:])
|
||||
|
||||
# the partial derivative vector for the likelihood
|
||||
if self.likelihood.num_params == 0:
|
||||
|
|
@ -170,8 +170,8 @@ class FITC(SparseGP):
|
|||
return dL_dtheta
|
||||
|
||||
def dL_dZ(self):
|
||||
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
|
||||
dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z)
|
||||
dL_dZ = self.kern.gradients_X(self._dL_dpsi1.T,self.Z,self.X)
|
||||
dL_dZ += self.kern.gradients_X(self._dL_dKmm,X=self.Z)
|
||||
dL_dZ += self._dpsi1_dX
|
||||
dL_dZ += self._dKmm_dX
|
||||
return dL_dZ
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class VarDTC(object):
|
|||
# no backsubstitution because of bound explosion on tr(A) if not...
|
||||
LmInv, _ = dtrtri(Lm, lower=1)
|
||||
A = LmInv.T.dot(psi2_beta.dot(LmInv))
|
||||
print A.sum()
|
||||
#print A.sum()
|
||||
else:
|
||||
if het_noise:
|
||||
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
|
||||
|
|
|
|||
|
|
@ -160,6 +160,9 @@ class kern(Parameterized):
|
|||
|
||||
return newkern
|
||||
|
||||
def __call__(self, X, X2=None):
|
||||
return self.K(X, X2)
|
||||
|
||||
def __mul__(self, other):
|
||||
""" Here we overload the '*' operator. See self.prod for more information"""
|
||||
return self.prod(other)
|
||||
|
|
@ -550,7 +553,7 @@ class Kern_check_dK_dX(Kern_check_model):
|
|||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel.dK_dX(self.dL_dK, self.X, self.X2).flatten()
|
||||
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
|
||||
|
|
@ -657,7 +660,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
|||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
print("gradients_X not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
@ -673,7 +676,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
|||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
print("gradients_X not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
@ -689,7 +692,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
|||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("dK_dX not implemented for " + kern.name)
|
||||
print("gradients_X not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ class Brownian(Kernpart):
|
|||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
target += np.dot(X.flatten(), dL_dKdiag)
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
raise NotImplementedError, "TODO"
|
||||
#target += self.variance
|
||||
#target -= self.variance*theta(X-X2.T)
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ class Matern32(Kernpart):
|
|||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None:
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
|
|
@ -105,8 +105,8 @@ class Matern32(Kernpart):
|
|||
else:
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
gradients_X = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
|
||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ class Matern52(Kernpart):
|
|||
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None:
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None]
|
||||
|
|
@ -104,8 +104,8 @@ class Matern52(Kernpart):
|
|||
else:
|
||||
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
|
||||
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
|
||||
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
|
||||
target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
|
||||
gradients_X = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
|
||||
target += np.sum(gradients_X*dL_dK.T[:,:,None],0)
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@ class Eq_ode1(Kernpart):
|
|||
def dKdiag_dtheta(self,dL_dKdiag,index,target):
|
||||
pass
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
pass
|
||||
|
||||
def _extract_t_indices(self, X, X2=None, dL_dK=None):
|
||||
|
|
|
|||
|
|
@ -95,13 +95,13 @@ class Exponential(Kernpart):
|
|||
# NB: derivative of diagonal elements wrt lengthscale is 0
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
gradients_X = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
|
||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ class Fixed(Kernpart):
|
|||
def dK_dtheta(self, partial, X, X2, target):
|
||||
target += (partial * self.fixed_K).sum()
|
||||
|
||||
def dK_dX(self, partial, X, X2, target):
|
||||
def gradients_X(self, partial, X, X2, target):
|
||||
pass
|
||||
|
||||
def dKdiag_dX(self, partial, X, target):
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ class Gibbs(Kernpart):
|
|||
|
||||
target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping])
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X."""
|
||||
# First account for gradients arising from presence of X in exponent.
|
||||
self._K_computations(X, X2)
|
||||
|
|
@ -105,8 +105,8 @@ class Gibbs(Kernpart):
|
|||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
|
||||
dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX*dL_dK.T[:, :, None], 0)
|
||||
gradients_X = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
|
||||
target += np.sum(gradients_X*dL_dK.T[:, :, None], 0)
|
||||
# Now account for gradients arising from presence of X in lengthscale.
|
||||
self._dK_computations(dL_dK)
|
||||
if X2 is None:
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ class Hetero(Kernpart):
|
|||
"""Gradient of diagonal of covariance with respect to parameters."""
|
||||
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X."""
|
||||
if X2==None or X2 is X:
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
|
|
|
|||
|
|
@ -55,14 +55,14 @@ class Hierarchical(Kernpart):
|
|||
[[[[k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)]
|
||||
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
raise NotImplementedError
|
||||
#X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
#if X2 is None:
|
||||
#X2,slices2 = X,slices
|
||||
#else:
|
||||
#X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
#[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
#[[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
#
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
raise NotImplementedError
|
||||
|
|
|
|||
|
|
@ -79,13 +79,13 @@ class IndependentOutputs(Kernpart):
|
|||
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
else:
|
||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
[[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ class Kernpart(Parameterized):
|
|||
# the number of optimisable parameters
|
||||
# the name of the covariance function.
|
||||
# link to parameterized objects
|
||||
self._parameters_ = []
|
||||
#self._X = None
|
||||
|
||||
def connect_input(self, X):
|
||||
|
|
@ -106,7 +105,7 @@ class Kernpart(Parameterized):
|
|||
raise NotImplementedError
|
||||
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
raise NotImplementedError
|
||||
def dKdiag_dX(self, dL_dK, X, target):
|
||||
raise NotImplementedError
|
||||
|
|
|
|||
|
|
@ -57,6 +57,27 @@ class Linear(Kernpart):
|
|||
def on_input_change(self, X):
|
||||
self._K_computations(X, None)
|
||||
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
self._psi_computations(Z, mu, S)
|
||||
|
||||
# psi0:
|
||||
tmp = dL_dpsi0[:, None] * self.mu2_S
|
||||
if self.ARD: self.variances.gradient = tmp.sum(0)
|
||||
else: self.variances.gradient = tmp.sum()
|
||||
|
||||
#psi1
|
||||
self.dK_dtheta(dL_dpsi1, mu, Z, self.variances.gradient)
|
||||
|
||||
#from psi2
|
||||
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
|
||||
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
|
||||
else: self.variances.gradient += tmp.sum()
|
||||
|
||||
#from Kmm
|
||||
self._K_computations(Z, None)
|
||||
self.dK_dtheta(dL_dKmm, Z, None, self.variances.gradient)
|
||||
|
||||
|
||||
# def _get_params(self):
|
||||
# return self.variances
|
||||
#
|
||||
|
|
@ -107,7 +128,7 @@ class Linear(Kernpart):
|
|||
else:
|
||||
target += tmp.sum()
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
if X2 is None:
|
||||
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
else:
|
||||
|
|
@ -150,7 +171,7 @@ class Linear(Kernpart):
|
|||
target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
|
||||
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||
self.dK_dX(dL_dpsi1.T, Z, mu, target)
|
||||
self.gradients_X(dL_dpsi1.T, Z, mu, target)
|
||||
|
||||
def psi2(self, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
|
|
@ -182,7 +203,7 @@ class Linear(Kernpart):
|
|||
def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
self.K(mu,Z,tmp)
|
||||
self.dK_dX(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu)
|
||||
self.gradients_X(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu)
|
||||
|
||||
Zs = Z*self.variances
|
||||
Zs_sq = Zs[:,None,:]*Zs[None,:,:]
|
||||
|
|
|
|||
|
|
@ -107,7 +107,7 @@ class MLP(Kernpart):
|
|||
|
||||
target[0] += np.sum(self._K_dvar*dL_dK)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X"""
|
||||
self._K_computations(X, X2)
|
||||
arg = self._K_asin_arg
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ class POLY(Kernpart):
|
|||
target[2] += base_cov_grad.sum()
|
||||
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X"""
|
||||
self._K_computations(X, X2)
|
||||
arg = self._K_poly_arg
|
||||
|
|
|
|||
|
|
@ -81,21 +81,21 @@ class Prod(Kernpart):
|
|||
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
|
||||
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
self._K_computations(X,X2)
|
||||
if X2 is None:
|
||||
if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize):
|
||||
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
|
||||
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
|
||||
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
|
||||
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
|
||||
else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize):
|
||||
#NOTE The indices column in the inputs makes the ki.dK_dX fail when passing None instead of X[:,self.slicei]
|
||||
#NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei]
|
||||
X2 = X
|
||||
self.k1.dK_dX(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.dK_dX(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
|
||||
self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
|
||||
else:
|
||||
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
|
||||
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
|
|
@ -103,8 +103,8 @@ class Prod(Kernpart):
|
|||
self.k1.Kdiag(X[:,self.slice1],K1)
|
||||
self.k2.Kdiag(X[:,self.slice2],K2)
|
||||
|
||||
self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
|
||||
self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
||||
|
|
|
|||
|
|
@ -67,11 +67,11 @@ class prod_orthogonal(Kernpart):
|
|||
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params])
|
||||
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:])
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
self._K_computations(X,X2)
|
||||
self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target)
|
||||
self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target)
|
||||
self.k1.gradients_X(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target)
|
||||
self.k2.gradients_X(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
|
|
@ -79,8 +79,8 @@ class prod_orthogonal(Kernpart):
|
|||
self.k1.Kdiag(X[:,0:self.k1.input_dim],K1)
|
||||
self.k2.Kdiag(X[:,self.k1.input_dim:],K2)
|
||||
|
||||
self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.input_dim], target)
|
||||
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.input_dim:], target)
|
||||
self.k1.gradients_X(dL_dKdiag*K2, X[:,:self.k1.input_dim], target)
|
||||
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.k1.input_dim:], target)
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ class RationalQuadratic(Kernpart):
|
|||
target[0] += np.sum(dL_dKdiag)
|
||||
# here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None:
|
||||
dist2 = np.square((X-X.T)/self.lengthscale)
|
||||
|
|
|
|||
|
|
@ -51,8 +51,11 @@ class RBF(Kernpart):
|
|||
lengthscale = np.ones(self.input_dim)
|
||||
|
||||
self.variance = Param('variance', variance)
|
||||
|
||||
self.lengthscale = Param('lengthscale', lengthscale)
|
||||
self.lengthscale.add_observer(self, self.update_lengthscale)
|
||||
self.update_lengthscale(self.lengthscale)
|
||||
|
||||
self.add_parameters(self.variance, self.lengthscale)
|
||||
self.parameters_changed() # initializes cache
|
||||
|
||||
|
|
@ -114,7 +117,7 @@ class RBF(Kernpart):
|
|||
self._K_computations(X, Z)
|
||||
self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
|
||||
if self.ARD:
|
||||
self.lengthscales.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
|
||||
self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
|
||||
|
||||
else:
|
||||
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
|
||||
|
|
@ -123,7 +126,7 @@ class RBF(Kernpart):
|
|||
self._K_computations(Z, None)
|
||||
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
|
||||
if self.ARD:
|
||||
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
||||
self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
||||
else:
|
||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||
|
||||
|
|
@ -157,7 +160,7 @@ class RBF(Kernpart):
|
|||
self._K_computations(Z, None)
|
||||
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
|
||||
if self.ARD:
|
||||
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
||||
self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
||||
else:
|
||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||
|
||||
|
|
@ -168,8 +171,8 @@ class RBF(Kernpart):
|
|||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -142,14 +142,14 @@ class RBFInv(RBF):
|
|||
else:
|
||||
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) * (-self.lengthscale2)
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
if X2 is None:
|
||||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
gradients_X = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ class RBFCos(Kernpart):
|
|||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
#TODO!!!
|
||||
raise NotImplementedError
|
||||
|
||||
|
|
|
|||
|
|
@ -144,8 +144,8 @@ class SS_RBF(Kernpart):
|
|||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ class Symmetric(Kernpart):
|
|||
self.k.dK_dtheta(dL_dK,AX,AX2,target)
|
||||
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
AX = np.dot(X,self.transform)
|
||||
if X2 is None:
|
||||
|
|
@ -62,10 +62,10 @@ class Symmetric(Kernpart):
|
|||
ZX2 = AX
|
||||
else:
|
||||
AX2 = np.dot(X2, self.transform)
|
||||
self.k.dK_dX(dL_dK, X, X2, target)
|
||||
self.k.dK_dX(dL_dK, AX, X2, target)
|
||||
self.k.dK_dX(dL_dK, X, AX2, target)
|
||||
self.k.dK_dX(dL_dK, AX ,AX2, target)
|
||||
self.k.gradients_X(dL_dK, X, X2, target)
|
||||
self.k.gradients_X(dL_dK, AX, X2, target)
|
||||
self.k.gradients_X(dL_dK, X, AX2, target)
|
||||
self.k.gradients_X(dL_dK, AX ,AX2, target)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
|
|
|
|||
|
|
@ -357,7 +357,7 @@ class spkern(Kernpart):
|
|||
def dKdiag_dtheta(self,partial,X,target):
|
||||
self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial)
|
||||
|
||||
def dK_dX(self,partial,X,Z,target):
|
||||
def gradients_X(self,partial,X,Z,target):
|
||||
if Z is None:
|
||||
self._weave_inline(self._dK_dX_code_X, X, target, Z, partial)
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -57,4 +57,4 @@ class Kernel(Mapping):
|
|||
return np.hstack((self._df_dA.flatten(), self._df_dbias))
|
||||
|
||||
def df_dX(self, dL_df, X):
|
||||
return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
|
||||
return self.kern.gradients_X((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
import itertools
|
||||
from gplvm import GPLVM
|
||||
from .. import kern
|
||||
from ..core import SparseGP
|
||||
|
|
@ -23,15 +22,10 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
|||
:type init: 'PCA'|'random'
|
||||
|
||||
"""
|
||||
def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, name='bayesian gplvm', **kwargs):
|
||||
if type(likelihood_or_Y) is np.ndarray:
|
||||
likelihood = Gaussian(likelihood_or_Y)
|
||||
else:
|
||||
likelihood = likelihood_or_Y
|
||||
|
||||
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, inference_method=None, likelihood=Gaussian(), name='bayesian gplvm', **kwargs):
|
||||
if X == None:
|
||||
X = self.initialise_latent(init, input_dim, likelihood.Y)
|
||||
X = self.initialise_latent(init, input_dim, Y)
|
||||
self.init = init
|
||||
|
||||
if X_variance is None:
|
||||
|
|
@ -44,9 +38,9 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
|||
if kernel is None:
|
||||
kernel = kern.rbf(input_dim) # + kern.white(input_dim)
|
||||
|
||||
SparseGP.__init__(self, X=X, likelihood=likelihood, kernel=kernel, Z=Z, X_variance=X_variance, name=name, **kwargs)
|
||||
self.q = Normal(self.X, self.X_variance)
|
||||
self.add_parameter(self.q, gradient=self._dbound_dmuS, index=0)
|
||||
self.q = Normal(X, X_variance)
|
||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs)
|
||||
self.add_parameter(self.q, index=0)
|
||||
self.ensure_default_constraints()
|
||||
|
||||
def _getstate(self):
|
||||
|
|
@ -94,9 +88,9 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
|||
return dKL_dmu, dKL_dS
|
||||
|
||||
def dL_dmuS(self):
|
||||
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
|
||||
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
|
||||
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
|
||||
|
||||
|
|
@ -107,10 +101,25 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
|||
var_S = np.sum(self.X_variance - np.log(self.X_variance))
|
||||
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
|
||||
|
||||
def log_likelihood(self):
|
||||
ll = SparseGP.log_likelihood(self)
|
||||
kl = self.KL_divergence()
|
||||
return ll - kl
|
||||
def parameters_changed(self):
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
|
||||
self._log_marginal_likelihood -= self.KL_divergence()
|
||||
|
||||
#The derivative of the bound wrt the inducing inputs Z
|
||||
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
||||
self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
|
||||
self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
|
||||
|
||||
dL_dmu, dL_dS = self.dL_dmuS()
|
||||
dKL_dmu, dKL_dS = self.dKL_dmuS()
|
||||
self.q.means.gradient = dL_dmu - dKL_dmu
|
||||
self.q.variances.gradient = dL_dS - dKL_dS
|
||||
|
||||
|
||||
# def log_likelihood(self):
|
||||
# ll = SparseGP.log_likelihood(self)
|
||||
# kl = self.KL_divergence()
|
||||
# return ll - kl
|
||||
|
||||
def _dbound_dmuS(self):
|
||||
dKL_dmu, dKL_dS = self.dKL_dmuS()
|
||||
|
|
@ -181,18 +190,18 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
|||
"""
|
||||
dmu_dX = np.zeros_like(Xnew)
|
||||
for i in range(self.Z.shape[0]):
|
||||
dmu_dX += self.kern.dK_dX(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
|
||||
dmu_dX += self.kern.gradients_X(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
|
||||
return dmu_dX
|
||||
|
||||
def dmu_dXnew(self, Xnew):
|
||||
"""
|
||||
Individual gradient of prediction at Xnew w.r.t. each sample in Xnew
|
||||
"""
|
||||
dK_dX = np.zeros((Xnew.shape[0], self.num_inducing))
|
||||
gradients_X = np.zeros((Xnew.shape[0], self.num_inducing))
|
||||
ones = np.ones((1, 1))
|
||||
for i in range(self.Z.shape[0]):
|
||||
dK_dX[:, i] = self.kern.dK_dX(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
|
||||
return np.dot(dK_dX, self.Cpsi1Vf)
|
||||
gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
|
||||
return np.dot(gradients_X, self.Cpsi1Vf)
|
||||
|
||||
def plot_steepest_gradient_map(self, *args, ** kwargs):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class BCGPLVM(GPLVM):
|
|||
GP._set_params(self, x[self.mapping.num_params:])
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
dL_df = self.kern.dK_dX(self.dL_dK, self.X)
|
||||
dL_df = self.kern.gradients_X(self.dL_dK, self.X)
|
||||
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
|
||||
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))
|
||||
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ class GPLVM(GP):
|
|||
def jacobian(self,X):
|
||||
target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
|
||||
for i in range(self.output_dim):
|
||||
target[:,:,i]=self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
|
||||
target[:,:,i]=self.kern.gradients_X(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
|
||||
return target
|
||||
|
||||
def magnification(self,X):
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
|
|||
|
||||
def dL_dX(self):
|
||||
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X)
|
||||
dL_dX += self.kern.dK_dX(self.dL_dpsi1, self.X, self.Z)
|
||||
dL_dX += self.kern.gradients_X(self.dL_dpsi1, self.X, self.Z)
|
||||
|
||||
return dL_dX
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import pylab as pb
|
||||
import pylab as pb, numpy as np
|
||||
from ...util.misc import param_to_array
|
||||
|
||||
def plot(parameterized, fignum=None, ax=None, colors=None):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
import numpy as np
|
||||
from ..core.parameterization.array_core import ObservableArray
|
||||
from ..core.parameterization.array_core import ObservableArray, ParamList
|
||||
class Cacher(object):
|
||||
def __init__(self, operation, limit=5):
|
||||
self.limit = int(limit)
|
||||
self.operation=operation
|
||||
self.cached_inputs = []
|
||||
self.cached_inputs = ParamList([])
|
||||
self.cached_outputs = []
|
||||
self.inputs_changed = []
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue