Merge branch 'params' of github.com:SheffieldML/GPy into params

Conflicts:
	GPy/kern/parts/prod.py
This commit is contained in:
Ricardo 2014-02-11 19:02:45 +00:00
commit 91d8890a13
76 changed files with 2023 additions and 1429 deletions

View file

@ -48,14 +48,13 @@ class GP(Model):
if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian):
inference_method = exact_gaussian_inference.ExactGaussianInference()
else:
inference_method = expectation_propagation
print "defaulting to ", inference_method, "for latent function inference"
else:
inference_method = expectation_propagation
print "defaulting to ", inference_method, "for latent function inference"
self.inference_method = inference_method
self.add_parameter(self.kern)
self.add_parameter(self.likelihood)
self.parameters_changed()
def parameters_changed(self):
@ -65,9 +64,6 @@ class GP(Model):
def log_likelihood(self):
return self._log_marginal_likelihood
def dL_dtheta_K(self):
return self.kern.dK_dtheta(self.posterior.dL_dK, self.X)
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
"""
Internal helper function for making predictions, does not account
@ -79,14 +75,17 @@ class GP(Model):
"""
Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T
LiKx, _ = dtrtrs(self.posterior._woodbury_chol, np.asfortranarray(Kx), lower=1)
mu = np.dot(Kx.T, self.posterior._woodbury_vector)
#LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1)
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - tdot(LiKx.T)
#var = Kxx - tdot(LiKx.T)
var = np.dot(Kx.T, WiKx)
else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(LiKx*LiKx, 0)
#var = Kxx - np.sum(LiKx*LiKx, 0)
var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1)
return mu, var
@ -185,7 +184,7 @@ class GP(Model):
from ..plotting.matplot_dep import models_plots
models_plots.plot_fit_f(self,*args,**kwargs)
def plot(self, *args):
def plot(self, *args, **kwargs):
"""
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region
@ -204,7 +203,7 @@ class GP(Model):
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots
models_plots.plot_fit(self,*args)
models_plots.plot_fit(self,*args,**kwargs)
def _getstate(self):
"""

View file

@ -170,15 +170,13 @@ class Model(Parameterized):
# first take care of all parameters (from N(0,1))
#x = self._get_params_transformed()
x = np.random.randn(self.size_transformed)
self._set_params_transformed(x)
x = self._untransform_params(x)
# now draw from prior where possible
x = self._get_params()
if self.priors is not None:
if self.priors is not None and len(self.priors):
[np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
self._set_params(x)
#self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
"""
Perform random restarts of the model, and set the model to the best
@ -260,19 +258,21 @@ class Model(Parameterized):
these terms are present in the name the parameter is
constrained positive.
"""
raise DeprecationWarning, 'parameters now have default constraints'
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
# param_names = self._get_param_names()
for s in positive_strings:
paramlist = self.grep_param_names(".*"+s)
for param in paramlist:
for p in param.flattened_parameters:
rav_i = set(self._raveled_index_for(p))
for constraint in self.constraints.iter_properties():
rav_i -= set(self._constraint_indices(p, constraint))
rav_i -= set(np.nonzero(self._fixes_for(p)!=UNFIXED)[0])
ind = self._backtranslate_index(p, np.array(list(rav_i), dtype=int))
if ind.size != 0:
p[np.unravel_index(ind, p.shape)].constrain_positive()
# for s in positive_strings:
# paramlist = self.grep_param_names(".*"+s)
# for param in paramlist:
# for p in param.flattened_parameters:
# rav_i = set(self._raveled_index_for(p))
# for constraint in self.constraints.iter_properties():
# rav_i -= set(self._constraint_indices(p, constraint))
# rav_i -= set(np.nonzero(self._fixes_for(p)!=UNFIXED)[0])
# ind = self._backtranslate_index(p, np.array(list(rav_i), dtype=int))
# if ind.size != 0:
# p[np.unravel_index(ind, p.shape)].constrain_positive(warning=warning)
# if paramlist:
# self.__getitem__(None, paramlist).constrain_positive(warning=warning)
# currently_constrained = self.all_constrained_indices()
@ -325,7 +325,6 @@ class Model(Parameterized):
if self._fail_count >= self._allowed_failures:
raise e
self._fail_count += 1
import ipdb;ipdb.set_trace()
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
return obj_grads
@ -383,7 +382,7 @@ class Model(Parameterized):
sgd.run()
self.optimization_runs.append(sgd)
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
"""
Check the gradient of the ,odel by comparing to a numerical
estimate. If the verbose flag is passed, invividual
@ -408,18 +407,18 @@ class Model(Parameterized):
dx = step * np.sign(np.random.uniform(-1, 1, x.size))
# evaulate around the point x
f1, g1 = self.objective_and_gradients(x + dx)
f2, g2 = self.objective_and_gradients(x - dx)
f1 = self.objective_function(x + dx)
f2 = self.objective_function(x - dx)
gradient = self.objective_function_gradients(x)
numerical_gradient = (f1 - f2) / (2 * dx)
global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient)))
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance)
else:
# check the gradient of each parameter individually, and do some pretty printing
try:
names = self._get_param_names_transformed()
names = self._get_param_names()
except NotImplementedError:
names = ['Variable %i' % i for i in range(len(x))]
# Prepare for pretty-printing
@ -433,39 +432,46 @@ class Model(Parameterized):
header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator])
if target_param is None:
param_list = range(len(x))
else:
param_list = self.grep_param_names(target_param, transformed=True, search=True)
if not np.any(param_list):
param_list = self._raveled_index_for(target_param)
if self._has_fixes():
param_list = np.intersect1d(param_list, np.r_[:self.size][self._fixes_], True)
if param_list.size == 0:
print "No free parameters to check"
return
for i in param_list:
gradient = self.objective_function_gradients(x)
np.where(gradient==0, 1e-312, gradient)
ret = True
for i, ind in enumerate(param_list):
xx = x.copy()
xx[i] += step
f1, g1 = self.objective_and_gradients(xx)
xx[i] -= 2.*step
f2, g2 = self.objective_and_gradients(xx)
gradient = self.objective_function_gradients(x)[i]
xx[ind] += step
f1 = self.objective_function(xx)
xx[ind] -= 2.*step
f2 = self.objective_function(xx)
numerical_gradient = (f1 - f2) / (2 * step)
ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient))
difference = np.abs((f1 - f2) / 2 / step - gradient)
ratio = (f1 - f2) / (2 * step * gradient[ind])
difference = np.abs((f1 - f2) / 2 / step - gradient[ind])
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
formatted_name = "\033[92m {0} \033[0m".format(names[i])
formatted_name = "\033[92m {0} \033[0m".format(names[ind])
ret &= True
else:
formatted_name = "\033[91m {0} \033[0m".format(names[i])
formatted_name = "\033[91m {0} \033[0m".format(names[ind])
ret &= False
r = '%.6f' % float(ratio)
d = '%.6f' % float(difference)
g = '%.6f' % gradient
g = '%.6f' % gradient[ind]
ng = '%.6f' % float(numerical_gradient)
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
print grad_string
return ret
def input_sensitivity(self):
"""
return an array describing the sesitivity of the model to each input

View file

@ -12,11 +12,22 @@ class ListArray(np.ndarray):
WARNING: This overrides the functionality of x==y!!!
Use numpy.equal(x,y) for element-wise equality testing.
"""
def __new__(cls, input_array):
obj = np.asanyarray(input_array).view(cls)
return obj
def __eq__(self, other):
return other is self
#def __eq__(self, other):
# return other is self
class ParamList(list):
def __contains__(self, other):
for el in self:
if el is other:
return True
return False
pass
class ObservableArray(ListArray, Observable):
"""
@ -25,8 +36,9 @@ class ObservableArray(ListArray, Observable):
will be called every time this array changes. The callable
takes exactly one argument, which is this array itself.
"""
__array_priority__ = 0 # Never give back Param
__array_priority__ = -1 # Never give back ObservableArray
def __new__(cls, input_array):
cls.__name__ = "ObservableArray\n "
obj = super(ObservableArray, cls).__new__(cls, input_array).view(cls)
obj._observers_ = {}
return obj
@ -34,18 +46,168 @@ class ObservableArray(ListArray, Observable):
# see InfoArray.__array_finalize__ for comments
if obj is None: return
self._observers_ = getattr(obj, '_observers_', None)
def __setitem__(self, s, val, update=True):
if self.ndim:
if not np.all(np.equal(self[s], val)):
super(ObservableArray, self).__setitem__(s, val)
if update:
self._notify_observers()
else:
if not np.all(np.equal(self, val)):
super(ObservableArray, self).__setitem__(Ellipsis, val)
if update:
self._notify_observers()
super(ObservableArray, self).__setitem__(s, val)
if update:
self._notify_observers()
def __getslice__(self, start, stop):
return self.__getitem__(slice(start, stop))
def __setslice__(self, start, stop, val):
return self.__setitem__(slice(start, stop), val)
return self.__setitem__(slice(start, stop), val)
def __copy__(self, *args):
return ObservableArray(self.base.base.copy(*args))
def copy(self, *args):
return self.__copy__(*args)
def __ror__(self, *args, **kwargs):
r = np.ndarray.__ror__(self, *args, **kwargs)
self._notify_observers()
return r
def __ilshift__(self, *args, **kwargs):
r = np.ndarray.__ilshift__(self, *args, **kwargs)
self._notify_observers()
return r
def __irshift__(self, *args, **kwargs):
r = np.ndarray.__irshift__(self, *args, **kwargs)
self._notify_observers()
return r
def __rrshift__(self, *args, **kwargs):
r = np.ndarray.__rrshift__(self, *args, **kwargs)
self._notify_observers()
return r
def __ixor__(self, *args, **kwargs):
r = np.ndarray.__ixor__(self, *args, **kwargs)
self._notify_observers()
return r
def __rxor__(self, *args, **kwargs):
r = np.ndarray.__rxor__(self, *args, **kwargs)
self._notify_observers()
return r
def __rdivmod__(self, *args, **kwargs):
r = np.ndarray.__rdivmod__(self, *args, **kwargs)
self._notify_observers()
return r
def __radd__(self, *args, **kwargs):
r = np.ndarray.__radd__(self, *args, **kwargs)
self._notify_observers()
return r
def __rdiv__(self, *args, **kwargs):
r = np.ndarray.__rdiv__(self, *args, **kwargs)
self._notify_observers()
return r
def __rtruediv__(self, *args, **kwargs):
r = np.ndarray.__rtruediv__(self, *args, **kwargs)
self._notify_observers()
return r
def __ipow__(self, *args, **kwargs):
r = np.ndarray.__ipow__(self, *args, **kwargs)
self._notify_observers()
return r
def __rmul__(self, *args, **kwargs):
r = np.ndarray.__rmul__(self, *args, **kwargs)
self._notify_observers()
return r
def __rpow__(self, *args, **kwargs):
r = np.ndarray.__rpow__(self, *args, **kwargs)
self._notify_observers()
return r
def __rsub__(self, *args, **kwargs):
r = np.ndarray.__rsub__(self, *args, **kwargs)
self._notify_observers()
return r
def __ifloordiv__(self, *args, **kwargs):
r = np.ndarray.__ifloordiv__(self, *args, **kwargs)
self._notify_observers()
return r
def __isub__(self, *args, **kwargs):
r = np.ndarray.__isub__(self, *args, **kwargs)
self._notify_observers()
return r
def __ior__(self, *args, **kwargs):
r = np.ndarray.__ior__(self, *args, **kwargs)
self._notify_observers()
return r
def __itruediv__(self, *args, **kwargs):
r = np.ndarray.__itruediv__(self, *args, **kwargs)
self._notify_observers()
return r
def __idiv__(self, *args, **kwargs):
r = np.ndarray.__idiv__(self, *args, **kwargs)
self._notify_observers()
return r
def __rfloordiv__(self, *args, **kwargs):
r = np.ndarray.__rfloordiv__(self, *args, **kwargs)
self._notify_observers()
return r
def __iand__(self, *args, **kwargs):
r = np.ndarray.__iand__(self, *args, **kwargs)
self._notify_observers()
return r
def __imod__(self, *args, **kwargs):
r = np.ndarray.__imod__(self, *args, **kwargs)
self._notify_observers()
return r
def __iadd__(self, *args, **kwargs):
r = np.ndarray.__iadd__(self, *args, **kwargs)
self._notify_observers()
return r
def __imul__(self, *args, **kwargs):
r = np.ndarray.__imul__(self, *args, **kwargs)
self._notify_observers()
return r
def __rshift__(self, *args, **kwargs):
r = np.ndarray.__rshift__(self, *args, **kwargs)
self._notify_observers()
return r

View file

@ -77,6 +77,12 @@ class ParameterIndexOperations(object):
def iter_properties(self):
return self._properties.iterkeys()
def shift(self, start, size):
for ind in self.iterindices():
toshift = ind>=start
if len(toshift) > 0:
ind[toshift] += size
def clear(self):
self._properties.clear()
@ -90,11 +96,6 @@ class ParameterIndexOperations(object):
return self._properties.values()
def properties_for(self, index):
# already_seen = dict()
# for ni in index:
# if ni not in already_seen:
# already_seen[ni] = [prop for prop in self.iter_properties() if ni in self._properties[prop]]
# yield already_seen[ni]
return vectorize(lambda i: [prop for prop in self.iter_properties() if i in self._properties[prop]], otypes=[list])(index)
def add(self, prop, indices):
@ -111,70 +112,11 @@ class ParameterIndexOperations(object):
self._properties[prop] = diff
else:
del self._properties[prop]
#[self._reverse[i].remove(prop) for i in removed if prop in self._reverse[i]]
return removed.astype(int)
# else:
# for a in self.properties():
# if numpy.all(a==prop) and a._parent_index_ == prop._parent_index_:
# ind = create_raveled_indices(indices, shape, offset)
# diff = remove_indices(self[a], ind)
# removed = numpy.intersect1d(self[a], ind, True)
# if not index_empty(diff):
# self._properties[a] = diff
# else:
# del self._properties[a]
# [self._reverse[i].remove(a) for i in removed if a in self._reverse[i]]
# return removed.astype(int)
return numpy.array([]).astype(int)
def __getitem__(self, prop):
return self._properties[prop]
# class TieIndexOperations(object):
# def __init__(self, params):
# self.params = params
# self.tied_from = ParameterIndexOperations()
# self.tied_to = ParameterIndexOperations()
# def add(self, tied_from, tied_to):
# rav_from = self.params._raveled_index_for(tied_from)
# rav_to = self.params._raveled_index_for(tied_to)
# self.tied_from.add(tied_to, rav_from)
# self.tied_to.add(tied_to, rav_to)
# return rav_from, rav_to
# def remove(self, tied_from, tied_to):
# rav_from = self.params._raveled_index_for(tied_from)
# rav_to = self.params._raveled_index_for(tied_to)
# rem_from = self.tied_from.remove(tied_to, rav_from)
# rem_to = self.tied_to.remove(tied_to, rav_to)
# left_from = self.tied_from._properties.pop(tied_to)
# left_to = self.tied_to._properties.pop(tied_to)
# self.tied_from[numpy.delete(tied_to, rem_from)] = left_from
# self.tied_to[numpy.delete(tied_to, rem_to)] = left_to
# return rav_from, rav_to
# def from_to_for(self, index):
# return self.tied_from.properties_for(index), self.tied_to.properties_for(index)
# def iter_from_to_indices(self):
# for k, f in self.tied_from.iteritems():
# yield f, self.tied_to[k]
# def iter_to_indices(self):
# return self.tied_to.iterindices()
# def iter_from_indices(self):
# return self.tied_from.iterindices()
# def iter_from_items(self):
# for f, i in self.tied_from.iteritems():
# yield f, i
# def iter_properties(self):
# return self.tied_from.iter_properties()
# def properties(self):
# return self.tied_from.properties()
# def from_to_indices(self, param):
# return self.tied_from[param], self.tied_to[param]
#
# # def create_raveled_indices(index, shape, offset=0):
# # if isinstance(index, (tuple, list)): i = [slice(None)] + list(index)
# # else: i = [slice(None), index]
# # ind = numpy.array(numpy.ravel_multi_index(numpy.indices(shape)[i], shape)).flat + numpy.int_(offset)
# # return ind
def combine_indices(arr1, arr2):
return numpy.union1d(arr1, arr2)

View file

@ -3,8 +3,8 @@
import itertools
import numpy
from parameter_core import Constrainable, adjust_name_for_printing
from array_core import ObservableArray
from parameter_core import Constrainable, Gradcheckable, adjust_name_for_printing
from array_core import ObservableArray, ParamList
###### printing
__constraints_name__ = "Constraint"
@ -12,47 +12,41 @@ __index_name__ = "Index"
__tie_name__ = "Tied to"
__precision__ = numpy.get_printoptions()['precision'] # numpy printing precision used, sublassing numpy ndarray after all
__print_threshold__ = 5
######
######
class Float(numpy.float64, Constrainable):
def __init__(self, f, base):
super(Float,self).__init__(f)
self._base = base
class Param(ObservableArray, Constrainable):
class Param(ObservableArray, Constrainable, Gradcheckable):
"""
Parameter object for GPy models.
:param name: name of the parameter to be printed
:param input_array: array which this parameter handles
:param str name: name of the parameter to be printed
:param input_array: array which this parameter handles
:type input_array: numpy.ndarray
:param default_constraint: The default constraint for this parameter
:type default_constraint:
You can add/remove constraints by calling constrain on the parameter itself, e.g:
- self[:,1].constrain_positive()
- self[0].tie_to(other)
- self.untie()
- self[:3,:].unconstrain()
- self[1].fix()
Fixing parameters will fix them to the value they are right now. If you change
the fixed value, it will be fixed to the new value!
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
This ndarray can be stored in lists and checked if it is in.
>>> import numpy as np
>>> x = np.random.normal(size=(10,3))
>>> x in [[1], x, [3]]
True
WARNING: This overrides the functionality of x==y!!!
Use numpy.equal(x,y) for element-wise equality testing.
"""
__array_priority__ = 0 # Never give back Param
__array_priority__ = -1 # Never give back Param
_fixes_ = None
def __new__(cls, name, input_array, *args, **kwargs):
def __new__(cls, name, input_array, default_constraint=None):
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
obj._current_slice_ = (slice(obj.shape[0]),)
obj._realshape_ = obj.shape
@ -66,16 +60,16 @@ class Param(ObservableArray, Constrainable):
obj.gradient = None
return obj
def __init__(self, name, input_array):
super(Param, self).__init__(name=name)
def __init__(self, name, input_array, default_constraint=None):
super(Param, self).__init__(name=name, default_constraint=default_constraint)
def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments
if obj is None: return
super(Param, self).__array_finalize__(obj)
self._direct_parent_ = getattr(obj, '_direct_parent_', None)
self._parent_index_ = getattr(obj, '_parent_index_', None)
self._highest_parent_ = getattr(obj, '_highest_parent_', None)
self._default_constraint_ = getattr(obj, '_default_constraint_', None)
self._current_slice_ = getattr(obj, '_current_slice_', None)
self._tied_to_me_ = getattr(obj, '_tied_to_me_', None)
self._tied_to_ = getattr(obj, '_tied_to_', None)
@ -86,7 +80,7 @@ class Param(ObservableArray, Constrainable):
self._original_ = getattr(obj, '_original_', None)
self._name = getattr(obj, 'name', None)
self.gradient = getattr(obj, 'gradient', None)
def __array_wrap__(self, out_arr, context=None):
return out_arr.view(numpy.ndarray)
#===========================================================================
@ -94,11 +88,11 @@ class Param(ObservableArray, Constrainable):
#===========================================================================
def __reduce_ex__(self):
func, args, state = super(Param, self).__reduce__()
return func, args, (state,
return func, args, (state,
(self.name,
self._direct_parent_,
self._parent_index_,
self._highest_parent_,
self._default_constraint_,
self._current_slice_,
self._realshape_,
self._realsize_,
@ -119,7 +113,7 @@ class Param(ObservableArray, Constrainable):
self._realsize_ = state.pop()
self._realshape_ = state.pop()
self._current_slice_ = state.pop()
self._highest_parent_ = state.pop()
self._default_constraint_ = state.pop()
self._parent_index_ = state.pop()
self._direct_parent_ = state.pop()
self.name = state.pop()
@ -132,13 +126,13 @@ class Param(ObservableArray, Constrainable):
self.flat = param
self._notify_tied_parameters()
self._notify_observers()
def _get_params(self):
return self.flat
# @property
# def name(self):
# """
# Name of this parameter.
# Name of this parameter.
# This can be a callable without parameters. The callable will be called
# every time the name property is accessed.
# """
@ -156,23 +150,6 @@ class Param(ObservableArray, Constrainable):
def _collect_gradient(self, target):
target[:] = self.gradient.flat
#===========================================================================
# Fixing Parameters:
#===========================================================================
def constrain_fixed(self, warning=True):
"""
Constrain this paramter to be fixed to the current value it carries.
:param warning: print a warning for overwriting constraints.
"""
self._highest_parent_._fix(self,warning)
fix = constrain_fixed
def unconstrain_fixed(self):
"""
This parameter will no longer be fixed.
"""
self._highest_parent_._unfix(self)
unfix = unconstrain_fixed
#===========================================================================
# Tying operations -> bugged, TODO
#===========================================================================
def tie_to(self, param):
@ -188,22 +165,22 @@ class Param(ObservableArray, Constrainable):
Note: For now only one parameter can have ties, so all of a parameter
will be removed, when re-tieing!
"""
#Note: this method will tie to the parameter which is the last in
#Note: this method will tie to the parameter which is the last in
# the chain of ties. Thus, if you tie to a tied parameter,
# this tie will be created to the parameter the param is tied
# to.
assert isinstance(param, Param), "Argument {1} not of type {0}".format(Param,param.__class__)
assert isinstance(param, Param), "Argument {1} not of type {0}".format(Param, param.__class__)
param = numpy.atleast_1d(param)
if param.size != 1:
raise NotImplementedError, "Broadcast tying is not implemented yet"
try:
if self._original_:
if self._original_:
self[:] = param
else: # this happens when indexing created a copy of the array
self._direct_parent_._get_original(self)[self._current_slice_] = param
except ValueError:
raise ValueError("Trying to tie {} with shape {} to {} with shape {}".format(self.name, self.shape, param.name, param.shape))
raise ValueError("Trying to tie {} with shape {} to {} with shape {}".format(self.name, self.shape, param.name, param.shape))
if param is self:
raise RuntimeError, 'Cyclic tieing is not allowed'
# if len(param._tied_to_) > 0:
@ -246,7 +223,7 @@ class Param(ObservableArray, Constrainable):
t_rav_i = t._raveled_index()
tr_rav_i = tied_to_me._raveled_index()
new_index = list(set(t_rav_i) | set(tr_rav_i))
tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index,t._realshape_)]
tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index, t._realshape_)]
self._tied_to_me_[tmp] = self._tied_to_me_[t] | set(self._raveled_index())
del self._tied_to_me_[t]
return
@ -259,7 +236,7 @@ class Param(ObservableArray, Constrainable):
import ipdb;ipdb.set_trace()
new_index = list(set(t_rav_i) - set(tr_rav_i))
if new_index:
tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index,t._realshape_)]
tmp = t._direct_parent_._get_original(t)[numpy.unravel_index(new_index, t._realshape_)]
self._tied_to_me_[tmp] = self._tied_to_me_[t]
del self._tied_to_me_[t]
if len(self._tied_to_me_[tmp]) == 0:
@ -267,12 +244,12 @@ class Param(ObservableArray, Constrainable):
else:
del self._tied_to_me_[t]
def _on_tied_parameter_changed(self, val, ind):
if not self._updated_: #not fast_array_equal(self, val[ind]):
if not self._updated_: # not fast_array_equal(self, val[ind]):
val = numpy.atleast_1d(val)
self._updated_ = True
if self._original_:
self.__setitem__(slice(None), val[ind], update=False)
else: # this happens when indexing created a copy of the array
else: # this happens when indexing created a copy of the array
self._direct_parent_._get_original(self).__setitem__(self._current_slice_, val[ind], update=False)
self._notify_tied_parameters()
self._updated_ = False
@ -291,7 +268,7 @@ class Param(ObservableArray, Constrainable):
def unset_prior(self, *priors):
"""
:param priors: priors to remove from this parameter
Remove all priors from this parameter
"""
self._highest_parent_._remove_prior(self, *priors)
@ -301,11 +278,11 @@ class Param(ObservableArray, Constrainable):
def __getitem__(self, s, *args, **kwargs):
if not isinstance(s, tuple):
s = (s,)
if not reduce(lambda a,b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim:
if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim:
s += (Ellipsis,)
new_arr = super(Param, self).__getitem__(s, *args, **kwargs)
try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base
except AttributeError: pass# returning 0d array or float, double etc
except AttributeError: pass # returning 0d array or float, double etc
return new_arr
def __setitem__(self, s, val, update=True):
super(Param, self).__setitem__(s, val, update=update)
@ -322,12 +299,12 @@ class Param(ObservableArray, Constrainable):
if numpy.all(si == Ellipsis):
continue
if isinstance(si, slice):
a = si.indices(self._realshape_[i])[0]
a = si.indices(self._realshape_[i])[0]
elif isinstance(si, (list,numpy.ndarray,tuple)):
a = si[0]
else: a = si
if a<0:
a = self._realshape_[i]+a
if a < 0:
a = self._realshape_[i] + a
internal_offset += a * extended_realshape[i]
return internal_offset
def _raveled_index(self, slice_index=None):
@ -335,8 +312,8 @@ class Param(ObservableArray, Constrainable):
# of this object
extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1]
ind = self._indices(slice_index)
if ind.ndim < 2: ind=ind[:,None]
return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape*x), 1, ind), dtype=int)
if ind.ndim < 2: ind = ind[:, None]
return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int)
def _expand_index(self, slice_index=None):
# this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes
# it basically translates slices to their respective index arrays and turns negative indices around
@ -349,11 +326,11 @@ class Param(ObservableArray, Constrainable):
if isinstance(a, slice):
start, stop, step = a.indices(b)
return numpy.r_[start:stop:step]
elif isinstance(a, (list,numpy.ndarray,tuple)):
elif isinstance(a, (list, numpy.ndarray, tuple)):
a = numpy.asarray(a, dtype=int)
a[a<0] = b + a[a<0]
elif a<0:
a = b+a
a[a < 0] = b + a[a < 0]
elif a < 0:
a = b + a
return numpy.r_[a]
return numpy.r_[:b]
return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size)))
@ -377,7 +354,7 @@ class Param(ObservableArray, Constrainable):
#===========================================================================
@property
def _description_str(self):
if self.size <= 1: return ["%f"%self]
if self.size <= 1: return ["%f" % self]
else: return [str(self.shape)]
def _parameter_names(self, add_name):
return [self.name]
@ -389,31 +366,31 @@ class Param(ObservableArray, Constrainable):
return [self.shape]
@property
def _constraints_str(self):
return [' '.join(map(lambda c: str(c[0]) if c[1].size==self._realsize_ else "{"+str(c[0])+"}", self._highest_parent_._constraints_iter_items(self)))]
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self._highest_parent_._constraints_iter_items(self)))]
@property
def _ties_str(self):
return [t._short() for t in self._tied_to_] or ['']
@property
def name_hirarchical(self):
if self.has_parent():
return self._direct_parent_.hirarchy_name()+adjust_name_for_printing(self.name)
return self._direct_parent_.hirarchy_name() + adjust_name_for_printing(self.name)
return adjust_name_for_printing(self.name)
def __repr__(self, *args, **kwargs):
name = "\033[1m{x:s}\033[0;0m:\n".format(
x=self.name_hirarchical)
return name + super(Param, self).__repr__(*args,**kwargs)
return name + super(Param, self).__repr__(*args, **kwargs)
def _ties_for(self, rav_index):
#size = sum(p.size for p in self._tied_to_)
# size = sum(p.size for p in self._tied_to_)
ties = numpy.empty(shape=(len(self._tied_to_), numpy.size(rav_index)), dtype=Param)
for i, tied_to in enumerate(self._tied_to_):
for t, ind in tied_to._tied_to_me_.iteritems():
if t._parent_index_ == self._parent_index_:
matches = numpy.where(rav_index[:,None] == t._raveled_index()[None, :])
matches = numpy.where(rav_index[:, None] == t._raveled_index()[None, :])
tt_rav_index = tied_to._raveled_index()
ind_rav_matches = numpy.where(tt_rav_index == numpy.array(list(ind)))[0]
if len(ind) != 1: ties[i, matches[0][ind_rav_matches]] = numpy.take(tt_rav_index, matches[1], mode='wrap')[ind_rav_matches]
else: ties[i, matches[0]] = numpy.take(tt_rav_index, matches[1], mode='wrap')
return map(lambda a: sum(a,[]), zip(*[[[tie.flatten()] if tx!=None else [] for tx in t] for t,tie in zip(ties,self._tied_to_)]))
return map(lambda a: sum(a, []), zip(*[[[tie.flatten()] if tx != None else [] for tx in t] for t, tie in zip(ties, self._tied_to_)]))
def _constraints_for(self, rav_index):
return self._highest_parent_._constraints_for(self, rav_index)
def _indices(self, slice_index=None):
@ -422,13 +399,13 @@ class Param(ObservableArray, Constrainable):
slice_index = self._current_slice_
if isinstance(slice_index, (tuple, list)):
clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
and len(set(map(len,clean_curr_slice))) <= 1):
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
and len(set(map(len, clean_curr_slice))) <= 1):
return numpy.fromiter(itertools.izip(*clean_curr_slice),
dtype=[('',int)]*self._realndim_,count=len(clean_curr_slice[0])).view((int, self._realndim_))
dtype=[('', int)] * self._realndim_, count=len(clean_curr_slice[0])).view((int, self._realndim_))
expanded_index = list(self._expand_index(slice_index))
return numpy.fromiter(itertools.product(*expanded_index),
dtype=[('',int)]*self._realndim_,count=reduce(lambda a,b: a*b.size,expanded_index,1)).view((int, self._realndim_))
dtype=[('', int)] * self._realndim_, count=reduce(lambda a, b: a * b.size, expanded_index, 1)).view((int, self._realndim_))
def _max_len_names(self, gen, header):
return reduce(lambda a, b:max(a, len(b)), gen, len(header))
def _max_len_values(self):
@ -441,9 +418,9 @@ class Param(ObservableArray, Constrainable):
if self._realsize_ < 2:
return name
ind = self._indices()
if ind.size > 4: indstr = ','.join(map(str,ind[:2])) + "..." + ','.join(map(str,ind[-2:]))
else: indstr = ','.join(map(str,ind))
return name+'['+indstr+']'
if ind.size > 4: indstr = ','.join(map(str, ind[:2])) + "..." + ','.join(map(str, ind[-2:]))
else: indstr = ','.join(map(str, ind))
return name + '[' + indstr + ']'
def __str__(self, constr_matrix=None, indices=None, ties=None, lc=None, lx=None, li=None, lt=None):
filter_ = self._current_slice_
vals = self.flat
@ -456,10 +433,10 @@ class Param(ObservableArray, Constrainable):
if lx is None: lx = self._max_len_values()
if li is None: li = self._max_len_index(indices)
if lt is None: lt = self._max_len_names(ties, __tie_name__)
header = " {i:^{2}s} | \033[1m{x:^{1}s}\033[0;0m | {c:^{0}s} | {t:^{3}s}".format(lc,lx,li,lt, x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__) # nice header for printing
header = " {i:^{2}s} | \033[1m{x:^{1}s}\033[0;0m | {c:^{0}s} | {t:^{3}s}".format(lc, lx, li, lt, x=self.name_hirarchical, c=__constraints_name__, i=__index_name__, t=__tie_name__) # nice header for printing
if not ties: ties = itertools.cycle([''])
return "\n".join([header]+[" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {t:^{4}s} ".format(lc,lx,__precision__,li,lt, x=x, c=" ".join(map(str,c)), t=(t or ''), i=i) for i,x,c,t in itertools.izip(indices,vals,constr_matrix,ties)]) # return all the constraints with right indices
#except: return super(Param, self).__str__()
return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, x=x, c=" ".join(map(str, c)), t=(t or ''), i=i) for i, x, c, t in itertools.izip(indices, vals, constr_matrix, ties)]) # return all the constraints with right indices
# except: return super(Param, self).__str__()
class ParamConcatenation(object):
def __init__(self, params):
@ -470,12 +447,12 @@ class ParamConcatenation(object):
See :py:class:`GPy.core.parameter.Param` for more details on constraining.
"""
#self.params = params
self.params = []
# self.params = params
self.params = ParamList([])
for p in params:
for p in p.flattened_parameters:
if p not in self.params:
self.params.append(p)
self.params.append(p)
self._param_sizes = [p.size for p in self.params]
startstops = numpy.cumsum([0] + self._param_sizes)
self._param_slices_ = [slice(start, stop) for start,stop in zip(startstops, startstops[1:])]
@ -483,15 +460,15 @@ class ParamConcatenation(object):
# Get/set items, enable broadcasting
#===========================================================================
def __getitem__(self, s):
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
params = [p._get_params()[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p._get_params()[ind[ps]])]
if len(params)==1: return params[0]
return ParamConcatenation(params)
def __setitem__(self, s, val, update=True):
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
vals = self._vals(); vals[s] = val; del val
[numpy.place(p, ind[ps], vals[ps]) and p._notify_tied_parameters()
for p, ps in zip(self.params, self._param_slices_)]
[numpy.place(p, ind[ps], vals[ps]) and p._notify_tied_parameters()
for p, ps in zip(self.params, self._param_slices_)]
if update:
self.params[0]._highest_parent_.parameters_changed()
def _vals(self):
@ -499,46 +476,68 @@ class ParamConcatenation(object):
#===========================================================================
# parameter operations:
#===========================================================================
def update_all_params(self):
self.params[0]._highest_parent_.parameters_changed()
def constrain(self, constraint, warning=True):
[param.constrain(constraint) for param in self.params]
[param.constrain(constraint, update=False) for param in self.params]
self.update_all_params()
constrain.__doc__ = Param.constrain.__doc__
def constrain_positive(self, warning=True):
[param.constrain_positive(warning) for param in self.params]
[param.constrain_positive(warning, update=False) for param in self.params]
self.update_all_params()
constrain_positive.__doc__ = Param.constrain_positive.__doc__
def constrain_fixed(self, warning=True):
[param.constrain_fixed(warning) for param in self.params]
constrain_fixed.__doc__ = Param.constrain_fixed.__doc__
fix = constrain_fixed
def constrain_negative(self, warning=True):
[param.constrain_negative(warning) for param in self.params]
[param.constrain_negative(warning, update=False) for param in self.params]
self.update_all_params()
constrain_negative.__doc__ = Param.constrain_negative.__doc__
def constrain_bounded(self, lower, upper, warning=True):
[param.constrain_bounded(lower, upper, warning) for param in self.params]
[param.constrain_bounded(lower, upper, warning, update=False) for param in self.params]
self.update_all_params()
constrain_bounded.__doc__ = Param.constrain_bounded.__doc__
def unconstrain(self, *constraints):
[param.unconstrain(*constraints) for param in self.params]
unconstrain.__doc__ = Param.unconstrain.__doc__
def unconstrain_negative(self):
[param.unconstrain_negative() for param in self.params]
unconstrain_negative.__doc__ = Param.unconstrain_negative.__doc__
def unconstrain_positive(self):
[param.unconstrain_positive() for param in self.params]
unconstrain_positive.__doc__ = Param.unconstrain_positive.__doc__
def unconstrain_fixed(self):
[param.unconstrain_fixed() for param in self.params]
unconstrain_fixed.__doc__ = Param.unconstrain_fixed.__doc__
unfix = unconstrain_fixed
def unconstrain_bounded(self, lower, upper):
[param.unconstrain_bounded(lower, upper) for param in self.params]
unconstrain_bounded.__doc__ = Param.unconstrain_bounded.__doc__
def untie(self, *ties):
[param.untie(*ties) for param in self.params]
__lt__ = lambda self, val: self._vals()<val
__le__ = lambda self, val: self._vals()<=val
__eq__ = lambda self, val: self._vals()==val
__ne__ = lambda self, val: self._vals()!=val
__gt__ = lambda self, val: self._vals()>val
__ge__ = lambda self, val: self._vals()>=val
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
return self.params[0]._highest_parent_._checkgrad(self, verbose, step, tolerance)
#checkgrad.__doc__ = Gradcheckable.checkgrad.__doc__
__lt__ = lambda self, val: self._vals() < val
__le__ = lambda self, val: self._vals() <= val
__eq__ = lambda self, val: self._vals() == val
__ne__ = lambda self, val: self._vals() != val
__gt__ = lambda self, val: self._vals() > val
__ge__ = lambda self, val: self._vals() >= val
def __str__(self, *args, **kwargs):
def f(p):
ind = p._raveled_index()
@ -555,9 +554,9 @@ class ParamConcatenation(object):
return "\n{}\n".format(" -"+"- | -".join(['-'*l for l in [li,lx,lc,lt]])).join(strings)
def __repr__(self):
return "\n".join(map(repr,self.params))
if __name__ == '__main__':
from GPy.core.parameterized import Parameterized
from GPy.core.parameter import Param
@ -567,17 +566,17 @@ if __name__ == '__main__':
print "random done"
p = Param("q_mean", X)
p1 = Param("q_variance", numpy.random.rand(*p.shape))
p2 = Param("Y", numpy.random.randn(p.shape[0],1))
p2 = Param("Y", numpy.random.randn(p.shape[0], 1))
p3 = Param("variance", numpy.random.rand())
p4 = Param("lengthscale", numpy.random.rand(2))
m = Parameterized()
rbf = Parameterized(name='rbf')
rbf.add_parameter(p3,p4)
m.add_parameter(p,p1,rbf)
print "setting params"
#print m.q_v[3:5,[1,4,5]]
print "constraining variance"

View file

@ -14,12 +14,13 @@ class Observable(object):
_observers_ = {}
def add_observer(self, observer, callble):
self._observers_[observer] = callble
callble(self)
#callble(self)
def remove_observer(self, observer):
del self._observers_[observer]
def _notify_observers(self):
[callble(self) for callble in self._observers_.itervalues()]
class Pickleable(object):
def _getstate(self):
"""
@ -35,9 +36,9 @@ class Pickleable(object):
Set the state (memento pattern) of this class to the given state.
Usually this is just the counterpart to _getstate, such that
an object is a copy of another when calling
copy = <classname>.__new__(*args,**kw)._setstate(<to_be_copied>._getstate())
See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
"""
raise NotImplementedError, "To be able to use pickling you need to implement this method"
@ -47,19 +48,24 @@ class Pickleable(object):
#===============================================================================
class Parentable(object):
def __init__(self, direct_parent=None, highest_parent=None, parent_index=None):
super(Parentable,self).__init__()
def __init__(self, direct_parent=None, parent_index=None):
super(Parentable,self).__init__()
self._direct_parent_ = direct_parent
self._parent_index_ = parent_index
self._highest_parent_ = highest_parent
def has_parent(self):
return self._direct_parent_ is not None and self._highest_parent_ is not None
return self._direct_parent_ is not None
@property
def _highest_parent_(self):
if self._direct_parent_ is None:
return self
return self._direct_parent_._highest_parent_
class Nameable(Parentable):
_name = None
def __init__(self, name, direct_parent=None, highest_parent=None, parent_index=None):
super(Nameable,self).__init__(direct_parent, highest_parent, parent_index)
def __init__(self, name, direct_parent=None, parent_index=None):
super(Nameable,self).__init__(direct_parent, parent_index)
self._name = name or self.__class__.__name__
@property
@ -68,13 +74,45 @@ class Nameable(Parentable):
@name.setter
def name(self, name):
from_name = self.name
self._name = name
self._name = name
if self.has_parent():
self._direct_parent_._name_changed(self, from_name)
self._direct_parent_._name_changed(self, from_name)
class Gradcheckable(Parentable):
#===========================================================================
# Gradchecking
#===========================================================================
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
if self.has_parent():
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
return self._checkgrad(self[''], verbose=verbose, step=step, tolerance=tolerance)
def _checkgrad(self, param):
raise NotImplementedError, "Need log likelihood to check gradient against"
class Constrainable(Nameable):
def __init__(self, name):
def __init__(self, name, default_constraint=None):
super(Constrainable,self).__init__(name)
self._default_constraint_ = default_constraint
#===========================================================================
# Fixing Parameters:
#===========================================================================
def constrain_fixed(self, value=None, warning=True):
"""
Constrain this paramter to be fixed to the current value it carries.
:param warning: print a warning for overwriting constraints.
"""
if value is not None:
self[:] = value
self._highest_parent_._fix(self,warning)
fix = constrain_fixed
def unconstrain_fixed(self):
"""
This parameter will no longer be fixed.
"""
self._highest_parent_._unfix(self)
unfix = unconstrain_fixed
#===========================================================================
# Constrain operations -> done
#===========================================================================
@ -83,7 +121,7 @@ class Constrainable(Nameable):
:param transform: the :py:class:`GPy.core.transformations.Transformation`
to constrain the this parameter to.
:param warning: print a warning if re-constraining parameters.
Constrain the parameter to the given
:py:class:`GPy.core.transformations.Transformation`.
"""
@ -96,37 +134,37 @@ class Constrainable(Nameable):
self._add_constrain(p, transform, warning)
if update:
self.parameters_changed()
def constrain_positive(self, warning=True):
def constrain_positive(self, warning=True, update=True):
"""
:param warning: print a warning if re-constraining parameters.
Constrain this parameter to the default positive constraint.
"""
self.constrain(Logexp(), warning)
self.constrain(Logexp(), warning=warning, update=update)
def constrain_negative(self, warning=True):
def constrain_negative(self, warning=True, update=True):
"""
:param warning: print a warning if re-constraining parameters.
Constrain this parameter to the default negative constraint.
"""
self.constrain(NegativeLogexp(), warning)
self.constrain(NegativeLogexp(), warning=warning, update=update)
def constrain_bounded(self, lower, upper, warning=True):
def constrain_bounded(self, lower, upper, warning=True, update=True):
"""
:param lower, upper: the limits to bound this parameter to
:param warning: print a warning if re-constraining parameters.
Constrain this parameter to lie within the given range.
"""
self.constrain(Logistic(lower, upper), warning)
self.constrain(Logistic(lower, upper), warning=warning, update=update)
def unconstrain(self, *transforms):
"""
:param transforms: The transformations to unconstrain from.
remove all :py:class:`GPy.core.transformations.Transformation`
remove all :py:class:`GPy.core.transformations.Transformation`
transformats of this parameter object.
"""
if self.has_parent():
@ -137,20 +175,20 @@ class Constrainable(Nameable):
def unconstrain_positive(self):
"""
Remove positive constraint of this parameter.
Remove positive constraint of this parameter.
"""
self.unconstrain(Logexp())
def unconstrain_negative(self):
"""
Remove negative constraint of this parameter.
Remove negative constraint of this parameter.
"""
self.unconstrain(NegativeLogexp())
def unconstrain_bounded(self, lower, upper):
"""
:param lower, upper: the limits to unbound this parameter from
Remove (lower, upper) bounded constrain from this parameter/
"""
self.unconstrain(Logistic(lower, upper))

View file

@ -8,9 +8,10 @@ import cPickle
import itertools
from re import compile, _pattern_type
from param import ParamConcatenation, Param
from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing
from parameter_core import Constrainable, Pickleable, Observable, adjust_name_for_printing, Gradcheckable
from index_operations import ParameterIndexOperations,\
index_empty
from array_core import ParamList
#===============================================================================
# Printing:
@ -23,7 +24,7 @@ FIXED = False
UNFIXED = True
#===============================================================================
class Parameterized(Constrainable, Pickleable, Observable):
class Parameterized(Constrainable, Pickleable, Observable, Gradcheckable):
"""
Parameterized class
@ -69,11 +70,11 @@ class Parameterized(Constrainable, Pickleable, Observable):
super(Parameterized, self).__init__(name=name)
self._in_init_ = True
self._constraints_ = None#ParameterIndexOperations()
if not hasattr(self, "_parameters_"):
self._parameters_ = []
self._parameters_ = ParamList()
self.size = sum(p.size for p in self._parameters_)
if not self._has_fixes():
self._fixes_ = None
self._param_slices_ = []
self._connect_parameters()
self._added_names_ = set()
del self._in_init_
@ -153,6 +154,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
elif param._has_fixes(): self._fixes_ = np.r_[np.ones(self.size, dtype=bool), fixes_param]
else:
start = sum(p.size for p in self._parameters_[:index])
self.constraints.shift(start, param.size)
self._parameters_.insert(index, param)
# make sure fixes and constraints are indexed right
@ -164,12 +167,17 @@ class Parameterized(Constrainable, Pickleable, Observable):
self._fixes_ = np.ones(self.size+param.size, dtype=bool)
self._fixes_[ins:ins+param.size] = fixes_param
self.size += param.size
else:
raise RuntimeError, """Parameter exists already added and no copy made"""
self._connect_parameters()
# make sure the constraints are pulled over:
if hasattr(param, "_constraints_") and param._constraints_ is not None:
for t, ind in param._constraints_.iteritems():
self.constraints.add(t, ind+self._offset_for(param))
param._constraints_.clear()
if param._default_constraint_ is not None:
self._add_constrain(param, param._default_constraint_, False)
if self._has_fixes() and np.all(self._fixes_): # ==UNFIXED
self._fixes_= None
@ -188,10 +196,10 @@ class Parameterized(Constrainable, Pickleable, Observable):
note: if it is a string object it will not (!) be regexp-matched
automatically.
"""
self._parameters_ = [p for p in self._parameters_
self._parameters_ = ParamList([p for p in self._parameters_
if not (p._parent_index_ in names_params_indices
or p.name in names_params_indices
or p in names_params_indices)]
or p in names_params_indices)])
self._connect_parameters()
def parameters_changed(self):
@ -211,18 +219,11 @@ class Parameterized(Constrainable, Pickleable, Observable):
if not hasattr(self, "_parameters_") or len(self._parameters_) < 1:
# no parameters for this class
return
i = 0
sizes = [0]
self._param_slices_ = []
for p in self._parameters_:
for i,p in enumerate(self._parameters_):
p._direct_parent_ = self
p._highest_parent_ = self
p._parent_index_ = i
i += 1
for pi in p.flattened_parameters:
pi._highest_parent_ = self
for pi in p._parameters_:
pi._highest_parent_ = self
not_unique = []
sizes.append(p.size+sizes[-1])
self._param_slices_.append(slice(sizes[-2], sizes[-1]))
@ -254,6 +255,16 @@ class Parameterized(Constrainable, Pickleable, Observable):
cPickle.dump(self, f, protocol)
def copy(self):
"""Returns a (deep) copy of the current model """
#dc = dict()
#for k, v in self.__dict__.iteritems():
#if k not in ['_highest_parent_', '_direct_parent_']:
#dc[k] = copy.deepcopy(v)
#dc = copy.deepcopy(self.__dict__)
#dc['_highest_parent_'] = None
#dc['_direct_parent_'] = None
#s = self.__class__.new()
#s.__dict__ = dc
return copy.deepcopy(self)
def __getstate__(self):
if self._has_get_set_state():
@ -308,14 +319,20 @@ class Parameterized(Constrainable, Pickleable, Observable):
#===========================================================================
# Optimization handles:
#===========================================================================
def _get_param_names_transformed(self):
def _get_param_names(self):
n = numpy.array([p.name_hirarchical+'['+str(i)+']' for p in self.flattened_parameters for i in p._indices()])
return n
def _get_param_names_transformed(self):
n = self._get_param_names()
if self._has_fixes():
return n[self._fixes_]
return n
def _get_params(self):
# don't overwrite this anymore!
return numpy.hstack([x._get_params() for x in self._parameters_])
if not self.size:
return np.empty(shape=(0,), dtype=np.float64)
return numpy.hstack([x._get_params() for x in self._parameters_ if x.size>0])
def _set_params(self, params, update=True):
# don't overwrite this anymore!
[p._set_params(params[s], update=update) for p,s in itertools.izip(self._parameters_,self._param_slices_)]
@ -329,10 +346,12 @@ class Parameterized(Constrainable, Pickleable, Observable):
return p
def _set_params_transformed(self, p):
# inverse apply transformations for parameters and set the resulting parameters
self._set_params(self._untransform_params(p))
def _untransform_params(self, p):
p = p.copy()
if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
[numpy.put(p, ind, c.f(p[ind])) for c,ind in self.constraints.iteritems() if c != __fixed__]
self._set_params(p)
return p
def _name_changed(self, param, old_name):
if hasattr(self, old_name) and old_name in self._added_names_:
delattr(self, old_name)
@ -365,6 +384,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
that is an int array, containing the indexes for the flattened
param inside this parameterized logic.
"""
if isinstance(param, ParamConcatenation):
return numpy.hstack((self._raveled_index_for(p) for p in param.params))
return param._raveled_index() + self._offset_for(param)
def _raveled_index(self):
@ -374,7 +395,7 @@ class Parameterized(Constrainable, Pickleable, Observable):
"""
return numpy.r_[:self.size]
#===========================================================================
# Handle ties:
# Fixing parameters:
#===========================================================================
def _set_fixed(self, param_or_index):
if not self._has_fixes(): self._fixes_ = numpy.ones(self.size, dtype=bool)
@ -399,9 +420,6 @@ class Parameterized(Constrainable, Pickleable, Observable):
if self._has_fixes():
return self._fixes_[self._raveled_index_for(param)]
return numpy.ones(self.size, dtype=bool)[self._raveled_index_for(param)]
#===========================================================================
# Fixing parameters:
#===========================================================================
def _fix(self, param, warning=True):
f = self._add_constrain(param, __fixed__, warning)
self._set_fixed(f)
@ -412,6 +430,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
#===========================================================================
# Convenience for fixed, tied checking of param:
#===========================================================================
def fixed_indices(self):
return np.array([x.is_fixed for x in self._parameters_])
def _is_fixed(self, param):
# returns if the whole param is fixed
if not self._has_fixes():
@ -441,7 +461,8 @@ class Parameterized(Constrainable, Pickleable, Observable):
# if removing constraints before adding new is not wanted, just delete the above line!
self.constraints.add(transform, rav_i)
param = self._get_original(param)
param._set_params(transform.initialize(param._get_params()))
if not (transform == __fixed__):
param._set_params(transform.initialize(param._get_params()), update=False)
if warning and any(reconstrained):
# if you want to print the whole params object, which was reconstrained use:
# m = str(param[self._backtranslate_index(param, reconstrained)])

View file

@ -4,9 +4,9 @@
import numpy as np
from domains import _POSITIVE,_NEGATIVE, _BOUNDED
import sys
import sys
import weakref
_lim_val = -np.log(sys.float_info.epsilon)
_lim_val = -np.log(sys.float_info.epsilon)
class Transformation(object):
domain = None
@ -94,7 +94,7 @@ class LogexpClipped(Logexp):
def __str__(self):
return '+ve_c'
class Exponent(Transformation):
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative Exponent below. See old MATLAB code.
domain = _POSITIVE
@ -162,9 +162,11 @@ class Logistic(Transformation):
def initialize(self, f):
if np.any(np.logical_or(f < self.lower, f > self.upper)):
print "Warning: changing parameters to satisfy constraints"
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
#return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
#FIXME: Max, zeros_like right?
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(np.zeros_like(f)), f)
def __str__(self):
return '{},{}'.format(self.lower, self.upper)

View file

@ -3,10 +3,9 @@ Created on 6 Nov 2013
@author: maxz
'''
import numpy as np
from parameterized import Parameterized
from param import Param
from ...util.misc import param_to_array
from transformations import Logexp
class Normal(Parameterized):
'''
@ -16,9 +15,9 @@ class Normal(Parameterized):
'''
def __init__(self, means, variances, name='latent space'):
Parameterized.__init__(self, name=name)
self.means = Param("mean", means)
self.variances = Param('variance', variances)
self.add_parameters(self.means, self.variances)
self.mean = Param("mean", means)
self.variance = Param('variance', variances, Logexp())
self.add_parameters(self.mean, self.variance)
def plot(self, *args):
"""
@ -26,6 +25,7 @@ class Normal(Parameterized):
See GPy.plotting.matplot_dep.variational_plots
"""
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import variational_plots
from ...plotting.matplot_dep import variational_plots
return variational_plots.plot(self,*args)

View file

@ -6,6 +6,8 @@ from ..util.linalg import mdot, tdot, symmetrify, backsub_both_sides, chol_inv,
from gp import GP
from parameterization.param import Param
from ..inference.latent_function_inference import varDTC
from .. import likelihoods
from GPy.util.misc import param_to_array
class SparseGP(GP):
"""
@ -35,46 +37,58 @@ class SparseGP(GP):
#pick a sensible inference method
if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian):
inference_method = varDTC.Gaussian_inference()
inference_method = varDTC.VarDTC()
else:
#inference_method = ??
raise NotImplementedError, "what to do what to do?"
print "defaulting to ", inference_method, "for latent function inference"
GP.__init__(self, X, Y, likelihood, inference_method, kernel, name)
self.Z = Z
self.Z = Param('inducing inputs', Z)
self.num_inducing = Z.shape[0]
if X_variance is None:
self.has_uncertain_inputs = False
self.X_variance = None
else:
if not (X_variance is None):
assert X_variance.shape == X.shape
self.has_uncertain_inputs = True
self.X_variance = X_variance
self.X_variance = X_variance
self.Z = Param('inducing inputs', self.Z)
self.add_parameter(self.Z, gradient=self.dL_dZ, index=0)
self.add_parameter(self.kern, gradient=self.dL_dtheta)
self.add_parameter(self.likelihood, gradient=lambda:self.likelihood._gradients(partial=self.partial_for_likelihood))
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name)
self.add_parameter(self.Z, index=0)
def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
#The derivative of the bound wrt the inducing inputs Z
self.Z.gradient = self.kern.dK_dX(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
self.Z.gradient += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
self.Z.gradient += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
#The derivative of the bound wrt the inducing inputs Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
if self.X_variance is None:
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
else:
self.Z.gradient += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X)
self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
"""
Make a prediction for the latent function values
"""
#TODO!!!
if X_variance_new is None:
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = self.kern.K(Xnew, which_parts=which_parts)
var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx) # NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)
var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0)
else:
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
mu = np.dot(Kx, self.Cpsi1V)
if full_cov:
raise NotImplementedError, "TODO"
else:
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var[:,None]
def _getstate(self):

View file

@ -154,13 +154,13 @@ class SVIGP(GP):
self.psi2 = None
def dL_dtheta(self):
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
dL_dtheta = self.kern._param_grad_helper(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch)
else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X_batch, self.Z)
dL_dtheta += self.kern._param_grad_helper(self.dL_dpsi1, self.X_batch, self.Z)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch)
return dL_dtheta

View file

@ -3,7 +3,7 @@
import numpy as _np
default_seed = _np.random.seed(123344)
def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False):
def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False, output_dim=1e4):
"""
model for testing purposes. Samples from a GP with rbf kernel and learns
the samples with a new kernel. Normally not for optimization, just model cheking
@ -15,54 +15,54 @@ def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False):
num_inducing = 5
if plot:
output_dim = 1
input_dim = 2
input_dim = 3
else:
input_dim = 2
output_dim = 25
input_dim = 1
output_dim = output_dim
# generate GPLVM-like data
X = _np.random.rand(num_inputs, input_dim)
lengthscales = _np.random.rand(input_dim)
k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True)
+ GPy.kern.white(input_dim, 0.01))
#+ GPy.kern.white(input_dim, 0.01)
)
K = k.K(X)
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T
lik = Gaussian(Y, normalize=True)
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing)
m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
#===========================================================================
# randomly obstruct data with percentage p
p = .8
Y_obstruct = Y.copy()
Y_obstruct[_np.random.uniform(size=(Y.shape)) < p] = _np.nan
#===========================================================================
m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
#m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
m.lengthscales = lengthscales
if plot:
import matplotlib.pyplot as pb
m.plot()
pb.title('PCA initialisation')
m2.plot()
pb.title('PCA initialisation')
#m2.plot()
#pb.title('PCA initialisation')
if optimize:
m.optimize('scg', messages=verbose)
m2.optimize('scg', messages=verbose)
#m2.optimize('scg', messages=verbose)
if plot:
m.plot()
pb.title('After optimisation')
m2.plot()
pb.title('After optimisation')
#m2.plot()
#pb.title('After optimisation')
return m, m2
return m
def gplvm_oil_100(optimize=True, verbose=1, plot=True):
import GPy
@ -264,16 +264,15 @@ def bgplvm_simulation(optimize=True, verbose=1,
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0]
k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
k = kern.linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
m['noise'] = Y.var() / 100.
if optimize:
print "Optimizing model:"
m.optimize('scg', messages=verbose, max_iters=max_iters,
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05)
if plot:
m.plot_X_1d("BGPLVM Latent Space 1D")
m.q.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
return m

View file

@ -37,39 +37,43 @@ def student_t_approx(optimize=True, plot=True):
# Kernel object
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
kernel2 = kernel1.copy()
kernel3 = kernel1.copy()
kernel4 = kernel1.copy()
kernel2 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
kernel3 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
kernel4 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
#Gaussian GP model on clean data
m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
# optimize
m1.ensure_default_constraints()
m1.constrain_fixed('white', 1e-5)
m1['white'] = 1e-5
m1['white'].constrain_fixed('white')
m1.randomize()
#Gaussian GP model on corrupt data
m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2)
m2.ensure_default_constraints()
m2.constrain_fixed('white', 1e-5)
m1['white'] = 1e-5
m1['white'].constrain_fixed('white')
m2.randomize()
#Student t GP model on clean data
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution)
m3 = GPy.models.GPRegression(X, Y.copy(), kernel3, likelihood=stu_t_likelihood)
t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
m3 = GPy.core.GP(X, Y.copy(), kernel3, likelihood=t_distribution, inference_method=laplace_inf)
m3.ensure_default_constraints()
m3.constrain_bounded('t_noise', 1e-6, 10.)
m3.constrain_fixed('white', 1e-5)
m3['t_noise'].constrain_bounded(1e-6, 10.)
m3['white'] = 1e-5
m3['white'].constrain_fixed()
m3.randomize()
#Student t GP model on corrupt data
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd)
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution)
m4 = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood)
t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
m4 = GPy.core.GP(X, Yc.copy(), kernel4, likelihood=t_distribution, inference_method=laplace_inf)
m4.ensure_default_constraints()
m4.constrain_bounded('t_noise', 1e-6, 10.)
m4.constrain_fixed('white', 1e-5)
m4['t_noise'].constrain_bounded(1e-6, 10.)
m4['white'] = 1e-5
m4['white'].constrain_fixed()
m4.randomize()
if optimize:

View file

@ -281,11 +281,12 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True):
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
noise_model = GPy.likelihoods.poisson()
likelihood = GPy.likelihoods.Laplace(Y,noise_model)
kern = GPy.kern.rbf(1)
poisson_lik = GPy.likelihoods.Poisson()
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
# create simple GP Model
m = GPy.models.GPRegression(X, Y, likelihood=likelihood)
m = GPy.core.GP(X, Y, kernel=kern, likelihood=poisson_lik, inference_method=laplace_inf)
if optimize:
m.optimize(optimizer)
@ -472,9 +473,9 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
Z = np.random.uniform(-3., 3., (7, 1))
k = GPy.kern.rbf(1)
import ipdb;ipdb.set_trace()
# create simple GP Model - no input uncertainty on this one
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z)
if optimize:
m.optimize('scg', messages=1, max_iters=max_iters)
@ -485,7 +486,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
print m
# the same Model with uncertainty
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S)
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z, X_variance=S)
if optimize:
m.optimize('scg', messages=1, max_iters=max_iters)
if plot:

View file

@ -0,0 +1,96 @@
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
import numpy as np
log_2_pi = np.log(2*np.pi)
class DTC(object):
"""
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
The function self.inference returns a Posterior object, which summarizes
the posterior.
NB. It's not recommended to use this function! It's here for historical purposes.
"""
def __init__(self):
self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y):
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
num_inducing, _ = Z.shape
num_data, output_dim = Y.shape
#make sure the noise is not hetero
beta = 1./np.squeeze(likelihood.variance)
if beta.size <1:
raise NotImplementedError, "no hetero noise with this implementatino of DTC"
Kmm = kern.K(Z)
Knn = kern.Kdiag(X)
Knm = kern.K(X, Z)
U = Knm
Uy = np.dot(U.T,Y)
#factor Kmm
Kmmi, L, Li, _ = pdinv(Kmm)
# Compute A
LiUT, _ = dtrtrs(L, U.T*np.sqrt(beta), lower=1)
A_I = tdot(LiUT)
A = A_I + np.eye(num_inducing)
# factor A
LA = jitchol(A)
# back substutue to get b, P, v
tmp, _ = dtrtrs(L, Uy, lower=1)
b, _ = dtrtrs(LA, tmp*beta, lower=1)
tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
v, _ = dtrtrs(L, tmp, lower=1, trans=1)
tmp = tdrtrs(LA, Li, lower=1, trans=0)
P = tdot(tmp.T)
#compute log marginal
log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
-np.sum(np.log(np.diag(LA)))*output_dim + \
0.5*num_data*output_dim*np.log(beta) + \
-0.5*beta*np.sum(np.square(Y)) +
0.5*np.sum(np.square(b))
# Compute dL_dKmm
tmp, _ = dtrtrs(L, A_I, lower=1, trans=1)
dL_dK, _ = dtrtrs(L, tmp.T, lower=1, trans=0)
tmp, _ = dtrtrs(LA, tmp.T. lower=1, trans=1)
dL_dK -= tdot(tmp.T)
dL_dK *= output_dim
dL_dK -= tdot(v)
dL_dK /=2.
# Compute dL_dU
vvT_P = tdot(v.reshape(-1,1)) + P
vY = np.dot(v.reshape(-1,1),Y.T)
dL_dU = vY + np.dot(vvT_P, U.T)
dL_dU *= beta
#compute dL_dR
Uv = np.dot(U, v)
dL_dR = 0.5*(np.sum(U*np.dot(P, U.T), 1) - beta * np.sum(np.square(Y, 1)) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1)
)*beta**2
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU}
#update gradients
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
likelihood.update_gradients(dL_dR)
#construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict

View file

@ -24,4 +24,5 @@ etc.
"""
from exact_gaussian_inference import ExactGaussianInference
from laplace import LaplaceInference
expectation_propagation = 'foo' # TODO

View file

@ -53,6 +53,6 @@ class ExactGaussianInference(object):
likelihood.update_gradients(np.diag(dL_dK))
return Posterior(LW, alpha, K), log_marginal, {'dL_dK':dL_dK}
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK}

View file

@ -1,73 +1,54 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ...util.linalg import mdot, jitchol, chol_inv, tdot, dtrtrs
from ...core import SparseGP
class VarDTC(object):
def __init__(self):
self.const_jitter = 1e-6
class FITC(SparseGP):
"""
def inference(self, kern, X, X_variance, Z, likelihood, Y):
Sparse FITC approximation
num_inducing, _ = Z.shape
num_data, output_dim = Y.shape
:param X: inputs
:type X: np.ndarray (num_data x Q)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel: the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
#make sure we're not using variational uncertain inputs
assert = X_variance is None, "variational inducing inputs only for use with varDTC inference"
#Note: we can;t do the variational thing after making the GITC conditional approximation because K~ appears in the log determinant.
"""
#see whether we've got a different noise variance for each datum
sigma2 = np.squeeze(likelihood.variance)
het_noise = False
if sigma2.size <1:
het_noise = True
def __init__(self, X, likelihood, kernel, Z, normalize_X=False):
SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False)
assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs"
def update_likelihood_approximation(self, **kwargs):
"""
Approximates a non-Gaussian likelihood using Expectation Propagation
For a Gaussian likelihood, no iteration is required:
this function does nothing
"""
self.likelihood.restart()
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0, **kwargs)
self._set_params(self._get_params())
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.Z, self.X)
self.psi2 = None
Kmm = kern.K(Z)
Kdiag = kern.Kdiag(X)
Kmn = kern.K(X, Z)
def _computations(self):
#factor Kmm
self.Lm = jitchol(self.Kmm)
self.Lmi,info = dtrtrs(self.Lm,np.eye(self.num_inducing),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn)
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #NOTE: beta_star contains Diag0 and the precision
self.V_star = self.beta_star * self.likelihood.Y
Lm = jitchol(Kmm)
V = dtrtrs(Lm, Kmn, lower=1)
# The rather complex computations of self.A
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data)))
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
#compute effective noise
g_sn2 = sigma2 + Kdiag - np.sum(V*V, 0)
# factor B
self.B = np.eye(self.num_inducing) + self.A
self.LB = jitchol(self.B)
self.LBi = chol_inv(self.LB)
self.psi1V = np.dot(self.psi1, self.V_star)
#compute and factor B
tmp = Kmn / np.sqrt(g_snd)
tmp, _ = dtrtrs(Lm, tmp, lower=1)
A = tdot(tmp)
B = np.eye(num_inducing) + A
Bi, Lb, LBi, log_det_B = pdinv(B)
Lmi_psi1V, info = dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
#compute posterior parameters
tmp, _ = dtrtrs()
woodbury_vec =
psi1V = np.dot(self.psi1, self.V_star)
Lmi_psi1V, _ = dtrtrs(Lm, self.psi1V, lower=1, trans=0)
LBi_Lmi_psi1V, _ = dtrtrs(LB, Lmi_psi1V, lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
@ -116,10 +97,10 @@ class FITC(SparseGP):
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
self._dKmm_dX += self.kern.dK_dX(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
self._dpsi1_dtheta += self.kern._param_grad_helper(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern._param_grad_helper(_dKmm,self.Z)
self._dKmm_dX += self.kern.gradients_X(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.gradients_X(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood
if self.likelihood.num_params == 0:
@ -163,15 +144,15 @@ class FITC(SparseGP):
def dL_dtheta(self):
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X)
dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z)
dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z)
dL_dtheta += self.kern._param_grad_helper(self._dL_dpsi1,self.X,self.Z)
dL_dtheta += self.kern._param_grad_helper(self._dL_dKmm,X=self.Z)
dL_dtheta += self._dKmm_dtheta
dL_dtheta += self._dpsi1_dtheta
return dL_dtheta
def dL_dZ(self):
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z)
dL_dZ = self.kern.gradients_X(self._dL_dpsi1.T,self.Z,self.X)
dL_dZ += self.kern.gradients_X(self._dL_dKmm,X=self.Z)
dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX
return dL_dZ

View file

@ -11,255 +11,199 @@
#http://gaussianprocess.org/gpml/code.
import numpy as np
import scipy as sp
from likelihood import likelihood
from ..util.linalg import mdot, jitchol, pddet, dpotrs
from functools import partial as partial_func
from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify
from ...util.misc import param_to_array
from posterior import Posterior
import warnings
from scipy import optimize
class LaplaceInference(object):
"""Laplace approximation to a posterior"""
def __init__(self, data, noise_model, extra_data=None):
def __init__(self):
"""
Laplace Approximation
Find the moments \hat{f} and the hessian at this point
(using Newton-Raphson) of the unnormalised posterior
Compute the GP variables (i.e. generate some Y^{squiggle} and
z^{squiggle} which makes a gaussian the same as the laplace
approximation to the posterior, but normalised
Arguments
---------
:param data: array of data the likelihood function is approximating
:type data: NxD
:param noise_model: likelihood function - subclass of noise_model
:type noise_model: noise_model
:param extra_data: additional data used by some likelihood functions,
"""
self.data = data
self.noise_model = noise_model
self.extra_data = extra_data
#Inital values
self.N, self.D = self.data.shape
self.is_heteroscedastic = True
self.Nparams = 0
self.NORMAL_CONST = ((0.5 * self.N) * np.log(2 * np.pi))
self._mode_finding_tolerance = 1e-7
self._mode_finding_max_iter = 40
self.bad_fhat = True
self._previous_Ki_fhat = None
self.restart()
likelihood.__init__(self)
def restart(self):
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
"""
Reset likelihood variables to their defaults
Returns a Posterior class containing essential quantities of the posterior
"""
#Initial values for the GP variables
self.Y = np.zeros((self.N, 1))
self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N)[:, None]
self.Z = 0
self.YYT = None
self.old_Ki_f = None
self.bad_fhat = False
#make Y a normal array!
Y = param_to_array(Y)
def predictive_values(self,mu,var,full_cov,**noise_args):
if full_cov:
raise NotImplementedError, "Cannot make correlated predictions with an EP likelihood"
return self.noise_model.predictive_values(mu,var,**noise_args)
def log_predictive_density(self, y_test, mu_star, var_star):
"""
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
"""
return self.noise_model.log_predictive_density(y_test, mu_star, var_star)
def _get_params(self):
return np.asarray(self.noise_model._get_params())
def _get_param_names(self):
return self.noise_model._get_param_names()
def _set_params(self, p):
return self.noise_model._set_params(p)
def _shared_gradients_components(self):
d3lik_d3fhat = self.noise_model.d3logpdf_df3(self.f_hat, self.data, extra_data=self.extra_data)
dL_dfhat = 0.5*(np.diag(self.Ki_W_i)[:, None]*d3lik_d3fhat).T #why isn't this -0.5?
I_KW_i = np.eye(self.N) - np.dot(self.K, self.Wi_K_i)
return dL_dfhat, I_KW_i
def _Kgradients(self):
"""
Gradients with respect to prior kernel parameters dL_dK to be chained
with dK_dthetaK to give dL_dthetaK
:returns: dL_dK matrix
:rtype: Matrix (1 x num_kernel_params)
"""
dL_dfhat, I_KW_i = self._shared_gradients_components()
dlp = self.noise_model.dlogpdf_df(self.f_hat, self.data, extra_data=self.extra_data)
#Explicit
#expl_a = np.dot(self.Ki_f, self.Ki_f.T)
#expl_b = self.Wi_K_i
#expl = 0.5*expl_a - 0.5*expl_b
#dL_dthetaK_exp = dK_dthetaK(expl, X)
#Implicit
impl = mdot(dlp, dL_dfhat, I_KW_i)
#No longer required as we are computing these in the gp already
#otherwise we would take them away and add them back
#dL_dthetaK_imp = dK_dthetaK(impl, X)
#dL_dthetaK = dL_dthetaK_exp + dL_dthetaK_imp
#dL_dK = expl + impl
#No need to compute explicit as we are computing dZ_dK to account
#for the difference between the K gradients of a normal GP,
#and the K gradients including the implicit part
dL_dK = impl
return dL_dK
def _gradients(self, partial):
"""
Gradients with respect to likelihood parameters (dL_dthetaL)
:param partial: Not needed by this likelihood
:type partial: lambda function
:rtype: array of derivatives (1 x num_likelihood_params)
"""
dL_dfhat, I_KW_i = self._shared_gradients_components()
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = self.noise_model._laplace_gradients(self.f_hat, self.data, extra_data=self.extra_data)
#len(dlik_dthetaL)
num_params = len(self._get_param_names())
# make space for one derivative for each likelihood parameter
dL_dthetaL = np.zeros(num_params)
for thetaL_i in range(num_params):
#Explicit
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[:, thetaL_i])
#- 0.5*np.trace(mdot(self.Ki_W_i, (self.K, np.diagflat(dlik_hess_dthetaL[thetaL_i]))))
+ np.dot(0.5*np.diag(self.Ki_W_i)[:,None].T, dlik_hess_dthetaL[:, thetaL_i])
)
#Implicit
dfhat_dthetaL = mdot(I_KW_i, self.K, dlik_grad_dthetaL[:, thetaL_i])
dL_dthetaL_imp = np.dot(dL_dfhat, dfhat_dthetaL)
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
return dL_dthetaL
def _compute_GP_variables(self):
"""
Generate data Y which would give the normal distribution identical
to the laplace approximation to the posterior, but normalised
GPy expects a likelihood to be gaussian, so need to caluclate
the data Y^{\tilde} that makes the posterior match that found
by a laplace approximation to a non-gaussian likelihood but with
a gaussian likelihood
Firstly,
The hessian of the unormalised posterior distribution is (K^{-1} + W)^{-1},
i.e. z*N(f|f^{\hat}, (K^{-1} + W)^{-1}) but this assumes a non-gaussian likelihood,
we wish to find the hessian \Sigma^{\tilde}
that has the same curvature but using our new simulated data Y^{\tilde}
i.e. we do N(Y^{\tilde}|f^{\hat}, \Sigma^{\tilde})N(f|0, K) = z*N(f|f^{\hat}, (K^{-1} + W)^{-1})
and we wish to find what Y^{\tilde} and \Sigma^{\tilde}
We find that Y^{\tilde} = W^{-1}(K^{-1} + W)f^{\hat} and \Sigma^{tilde} = W^{-1}
Secondly,
GPy optimizes the log marginal log p(y) = -0.5*ln|K+\Sigma^{\tilde}| - 0.5*Y^{\tilde}^{T}(K^{-1} + \Sigma^{tilde})^{-1}Y + lik.Z
So we can suck up any differences between that and our log marginal likelihood approximation
p^{\squiggle}(y) = -0.5*f^{\hat}K^{-1}f^{\hat} + log p(y|f^{\hat}) - 0.5*log |K||K^{-1} + W|
which we want to optimize instead, by equating them and rearranging, the difference is added onto
the log p(y) that GPy optimizes by default
Thirdly,
Since we have gradients that depend on how we move f^{\hat}, we have implicit components
aswell as the explicit dL_dK, we hold these differences in dZ_dK and add them to dL_dK in the
gp.py code
"""
Wi = 1.0/self.W
self.Sigma_tilde = np.diagflat(Wi)
Y_tilde = Wi*self.Ki_f + self.f_hat
self.Wi_K_i = self.W12BiW12
ln_det_Wi_K = pddet(self.Sigma_tilde + self.K)
lik = self.noise_model.logpdf(self.f_hat, self.data, extra_data=self.extra_data)
y_Wi_K_i_y = mdot(Y_tilde.T, self.Wi_K_i, Y_tilde)
Z_tilde = (+ lik
- 0.5*self.ln_B_det
+ 0.5*ln_det_Wi_K
- 0.5*self.f_Ki_f
+ 0.5*y_Wi_K_i_y
+ self.NORMAL_CONST
)
#Convert to float as its (1, 1) and Z must be a scalar
self.Z = np.float64(Z_tilde)
self.Y = Y_tilde
self.YYT = np.dot(self.Y, self.Y.T)
self.covariance_matrix = self.Sigma_tilde
self.precision = 1.0 / np.diag(self.covariance_matrix)[:, None]
#Compute dZ_dK which is how the approximated distributions gradients differ from the dL_dK computed for other likelihoods
self.dZ_dK = self._Kgradients()
#+ 0.5*self.Wi_K_i - 0.5*np.dot(self.Ki_f, self.Ki_f.T) #since we are not adding the K gradients explicit part theres no need to compute this again
def fit_full(self, K):
"""
The laplace approximation algorithm, find K and expand hessian
For nomenclature see Rasmussen & Williams 2006 - modified for numerical stability
:param K: Prior covariance matrix evaluated at locations X
:type K: NxN matrix
"""
self.K = K.copy()
# Compute K
K = kern.K(X)
#Find mode
self.f_hat = self.rasm_mode(self.K)
if self.bad_fhat:
Ki_f_init = np.zeros_like(Y)
else:
Ki_f_init = self._previous_Ki_fhat
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
#Compute hessian and other variables at mode
self._compute_likelihood_variables()
log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
#Compute fake variables replicating laplace approximation to posterior
self._compute_GP_variables()
kern.update_gradients_full(dL_dK, X)
likelihood.update_gradients(dL_dthetaL)
def _compute_likelihood_variables(self):
self._previous_Ki_fhat = Ki_fhat.copy()
return Posterior(woodbury_vector=woodbury_vector, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK}
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None):
"""
Compute the variables required to compute gaussian Y variables
Rasmussen's numerically stable mode finding
For nomenclature see Rasmussen & Williams 2006
Influenced by GPML (BSD) code, all errors are our own
:param K: Covariance matrix evaluated at locations X
:type K: NxD matrix
:param Y: The data
:type Y: np.ndarray
:param likelihood: the likelihood of the latent function value for the given data
:type likelihood: a GPy.likelihood object
:param Ki_f_init: the initial guess at the mode
:type Ki_f_init: np.ndarray
:param Y_metadata: information about the data, e.g. which likelihood to take from a multi-likelihood object
:type Y_metadata: np.ndarray | None
:returns: f_hat, mode on which to make laplace approxmiation
:rtype: np.ndarray
"""
Ki_f = Ki_f_init.copy()
f = np.dot(K, Ki_f)
#define the objective function (to be maximised)
def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, extra_data=Y_metadata)
difference = np.inf
iteration = 0
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
W = -likelihood.d2logpdf_df2(f, Y, extra_data=Y_metadata)
W_f = W*f
grad = likelihood.dlogpdf_df(f, Y, extra_data=Y_metadata)
b = W_f + grad # R+W p46 line 6.
#W12BiW12Kb, B_logdet = self._compute_B_statistics(K, W.copy(), np.dot(K, b), likelihood.log_concave)
W12BiW12, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave)
W12BiW12Kb = np.dot(W12BiW12, np.dot(K, b))
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
full_step_Ki_f = b - W12BiW12Kb # full_step_Ki_f = a in R&W p46 line 6.
dKi_f = full_step_Ki_f - Ki_f
#define an objective for the line search (minimize this one)
def inner_obj(step_size):
Ki_f_trial = Ki_f + step_size*dKi_f
f_trial = np.dot(K, Ki_f_trial)
return -obj(Ki_f_trial, f_trial)
#use scipy for the line search, the compute new values of f, Ki_f
step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
Ki_f_new = Ki_f + step*dKi_f
f_new = np.dot(K, Ki_f_new)
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
Ki_f = Ki_f_new
f = f_new
iteration += 1
#Warn of bad fits
if difference > self._mode_finding_tolerance:
if not self.bad_fhat:
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
self.bad_fhat = True
elif self.bad_fhat:
self.bad_fhat = False
warnings.warn("f_hat now fine again")
return f, Ki_f
def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, kern, Y_metadata):
"""
At the mode, compute the hessian and effective covariance matrix.
returns: logZ : approximation to the marginal likelihood
woodbury_vector : variable required for calculating the approximation to the covariance matrix
woodbury_inv : variable required for calculating the approximation to the covariance matrix
dL_dthetaL : array of derivatives (1 x num_kernel_params)
dL_dthetaL : array of derivatives (1 x num_likelihood_params)
"""
#At this point get the hessian matrix (or vector as W is diagonal)
self.W = -self.noise_model.d2logpdf_df2(self.f_hat, self.data, extra_data=self.extra_data)
W = -likelihood.d2logpdf_df2(f_hat, Y, extra_data=Y_metadata)
if not self.noise_model.log_concave:
#print "Under 1e-10: {}".format(np.sum(self.W < 1e-6))
self.W[self.W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
self.W12BiW12, self.ln_B_det = self._compute_B_statistics(self.K, self.W, np.eye(self.N))
#compute vital matrices
C = np.dot(LiW12, K)
Ki_W_i = K - C.T.dot(C)
self.Ki_f = self.Ki_f
self.f_Ki_f = np.dot(self.f_hat.T, self.Ki_f)
self.Ki_W_i = self.K - mdot(self.K, self.W12BiW12, self.K)
#compute the log marginal
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, extra_data=Y_metadata) - np.sum(np.log(np.diag(L)))
def _compute_B_statistics(self, K, W, a):
#Compute vival matrices for derivatives
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, extra_data=Y_metadata) # -d3lik_d3fhat
woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, extra_data=Y_metadata)
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9.
#BiK, _ = dpotrs(L, K, lower=1)
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i)
####################
#compute dL_dK#
####################
if kern.size > 0 and not kern.is_fixed:
#Explicit
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
#Implicit
implicit_part = np.dot(woodbury_vector, dL_dfhat.T).dot(I_KW_i)
dL_dK = explicit_part + implicit_part
else:
dL_dK = np.zeros(likelihood.size)
####################
#compute dL_dthetaL#
####################
if likelihood.size > 0 and not likelihood.is_fixed:
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, extra_data=Y_metadata)
num_params = likelihood.size
# make space for one derivative for each likelihood parameter
dL_dthetaL = np.zeros(num_params)
for thetaL_i in range(num_params):
#Explicit
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i])
# The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL
+ 0.5*np.sum(np.diag(Ki_W_i).flatten()*dlik_hess_dthetaL[:, thetaL_i].flatten())
)
#Implicit
dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[:, thetaL_i])
#dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[:, thetaL_i])
dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL)
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
else:
dL_dthetaL = np.zeros(likelihood.size)
return log_marginal, woodbury_vector, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave):
"""
Rasmussen suggests the use of a numerically stable positive definite matrix B
Which has a positive diagonal element and can be easyily inverted
@ -268,136 +212,28 @@ class LaplaceInference(object):
:type K: NxN matrix
:param W: Negative hessian at a point (diagonal matrix)
:type W: Vector of diagonal values of hessian (1xN)
:param a: Matrix to calculate W12BiW12a
:type a: Matrix NxN
:returns: (W12BiW12, ln_B_det)
:returns: (W12BiW12, L_B, Li_W12)
"""
if not self.noise_model.log_concave:
if not log_concave:
#print "Under 1e-10: {}".format(np.sum(W < 1e-6))
W[W < 1e-6] = 1e-6 # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
# To cause the posterior to become less certain than the prior and likelihood,
# This is a property only held by non-log-concave likelihoods
#W is diagonal so its sqrt is just the sqrt of the diagonal elements
W_12 = np.sqrt(W)
B = np.eye(self.N) + W_12*K*W_12.T
B = np.eye(K.shape[0]) + W_12*K*W_12.T
L = jitchol(B)
W12BiW12a = W_12*dpotrs(L, np.asfortranarray(W_12*a), lower=1)[0]
ln_B_det = 2*np.sum(np.log(np.diag(L)))
return W12BiW12a, ln_B_det
LiW12, _ = dtrtrs(L, np.diagflat(W_12), lower=1, trans=0)
K_Wi_i = np.dot(LiW12.T, LiW12) # R = W12BiW12, in R&W p 126, eq 5.25
def rasm_mode(self, K, MAX_ITER=40):
"""
Rasmussen's numerically stable mode finding
For nomenclature see Rasmussen & Williams 2006
Influenced by GPML (BSD) code, all errors are our own
#here's a better way to compute the required matrix.
# you could do the model finding witha backsub, instead of a dot...
#L2 = L/W_12
#K_Wi_i_2 , _= dpotri(L2)
#symmetrify(K_Wi_i_2)
:param K: Covariance matrix evaluated at locations X
:type K: NxD matrix
:param MAX_ITER: Maximum number of iterations of newton-raphson before forcing finish of optimisation
:type MAX_ITER: scalar
:returns: f_hat, mode on which to make laplace approxmiation
:rtype: NxD matrix
"""
#old_Ki_f = np.zeros((self.N, 1))
return K_Wi_i, L, LiW12
#Start f's at zero originally of if we have gone off track, try restarting
if self.old_Ki_f is None or self.bad_fhat:
old_Ki_f = np.random.rand(self.N, 1)/50.0
#old_Ki_f = self.Y
f = np.dot(K, old_Ki_f)
else:
#Start at the old best point
old_Ki_f = self.old_Ki_f.copy()
f = self.f_hat.copy()
new_obj = -np.inf
old_obj = np.inf
def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.T, f) + self.noise_model.logpdf(f, self.data, extra_data=self.extra_data)
difference = np.inf
epsilon = 1e-7
#step_size = 1
#rs = 0
i = 0
while difference > epsilon and i < MAX_ITER:
W = -self.noise_model.d2logpdf_df2(f, self.data, extra_data=self.extra_data)
W_f = W*f
grad = self.noise_model.dlogpdf_df(f, self.data, extra_data=self.extra_data)
b = W_f + grad
W12BiW12Kb, _ = self._compute_B_statistics(K, W.copy(), np.dot(K, b))
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
full_step_Ki_f = b - W12BiW12Kb
dKi_f = full_step_Ki_f - old_Ki_f
f_old = f.copy()
def inner_obj(step_size, old_Ki_f, dKi_f, K):
Ki_f = old_Ki_f + step_size*dKi_f
f = np.dot(K, Ki_f)
# This is nasty, need to set something within an optimization though
self.tmp_Ki_f = Ki_f.copy()
self.tmp_f = f.copy()
return -obj(Ki_f, f)
i_o = partial_func(inner_obj, old_Ki_f=old_Ki_f, dKi_f=dKi_f, K=K)
#Find the stepsize that minimizes the objective function using a brent line search
#The tolerance and maxiter matter for speed! Seems to be best to keep them low and make more full
#steps than get this exact then make a step, if B was bigger it might be the other way around though
#new_obj = sp.optimize.minimize_scalar(i_o, method='brent', tol=1e-4, options={'maxiter':5}).fun
new_obj = sp.optimize.brent(i_o, tol=1e-4, maxiter=10)
f = self.tmp_f.copy()
Ki_f = self.tmp_Ki_f.copy()
#Optimize without linesearch
#f_old = f.copy()
#update_passed = False
#while not update_passed:
#Ki_f = old_Ki_f + step_size*dKi_f
#f = np.dot(K, Ki_f)
#old_obj = new_obj
#new_obj = obj(Ki_f, f)
#difference = new_obj - old_obj
##print "difference: ",difference
#if difference < 0:
##print "Objective function rose", np.float(difference)
##If the objective function isn't rising, restart optimization
#step_size *= 0.8
##print "Reducing step-size to {ss:.3} and restarting optimization".format(ss=step_size)
##objective function isn't increasing, try reducing step size
#f = f_old.copy() #it's actually faster not to go back to old location and just zigzag across the mode
#old_obj = new_obj
#rs += 1
#else:
#update_passed = True
#old_Ki_f = self.Ki_f.copy()
#difference = abs(new_obj - old_obj)
#old_obj = new_obj.copy()
difference = np.abs(np.sum(f - f_old)) + np.abs(np.sum(Ki_f - old_Ki_f))
#difference = np.abs(np.sum(Ki_f - old_Ki_f))/np.float(self.N)
old_Ki_f = Ki_f.copy()
i += 1
self.old_Ki_f = old_Ki_f.copy()
#Warn of bad fits
if difference > epsilon:
self.bad_fhat = True
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
elif self.bad_fhat:
self.bad_fhat = False
warnings.warn("f_hat now perfect again")
self.Ki_f = Ki_f
return f

View file

@ -2,19 +2,20 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ...util.linalg import pdinv, dpotrs, tdot, dtrtrs
from ...util.linalg import pdinv, dpotrs, tdot, dtrtrs, dpotri, symmetrify
class Posterior(object):
"""
An object to represent a Gaussian posterior over latent function values.
An object to represent a Gaussian posterior over latent function values, p(f|D).
This may be computed exactly for Gaussian likelihoods, or approximated for
non-Gaussian likelihoods.
The purpose of this class is to serve as an interface between the inference
schemes and the model classes.
schemes and the model classes. the model class can make predictions for
the function at any new point x_* by integrating over this posterior.
"""
def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None):
def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None):
"""
woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K
woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M
@ -45,7 +46,10 @@ class Posterior(object):
#obligatory
self._K = K
if ((woodbury_chol is not None) and (woodbury_vector is not None) and (K is not None)) or ((mean is not None) and (cov is not None) and (K is not None)):
if ((woodbury_chol is not None) and (woodbury_vector is not None))\
or ((woodbury_inv is not None) and (woodbury_vector is not None))\
or ((woodbury_inv is not None) and (mean is not None))\
or ((mean is not None) and (cov is not None)):
pass # we have sufficient to compute the posterior
else:
raise ValueError, "insufficient information to compute the posterior"
@ -56,6 +60,10 @@ class Posterior(object):
self._woodbury_chol = woodbury_chol
self._woodbury_vector = woodbury_vector
#option 2.
self._woodbury_inv = woodbury_inv
#and woodbury vector
#option 2:
self._mean = mean
self._covariance = cov
@ -66,7 +74,7 @@ class Posterior(object):
@property
def mean(self):
if self._mean is None:
self._mean = np.dot(self._K, self._woodbury_vector)
self._mean = np.dot(self._K, self.woodbury_vector)
return self._mean
@property
@ -85,16 +93,27 @@ class Posterior(object):
@property
def woodbury_chol(self):
if self._woodbury_chol is None:
B = self._K - self._covariance
tmp, _ = dpotrs(self._K_chol, B)
Wi, _ = dpotrs(self._K_chol, tmp.T)
_, _, self._woodbury_chol, _ = pdinv(Wi)
#try computing woodbury chol from cov
if self._woodbury_inv is not None:
_, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
elif self._covariance is not None:
B = self._K - self._covariance
tmp, _ = dpotrs(self.K_chol, B)
self._woodbury_inv, _ = dpotrs(self.K_chol, tmp.T)
_, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
return self._woodbury_chol
@property
def woodbury_inv(self):
if self._woodbury_inv is None:
self._woodbury_inv, _ = dpotri(self.woodbury_chol)
symmetrify(self._woodbury_inv)
return self._woodbury_inv
@property
def woodbury_vector(self):
if self._woodbury_vector is None:
self._woodbury_vector, _ = dpotrs(self._K_chol, self.mean)
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean)
return self._woodbury_vector
@property

View file

@ -2,8 +2,10 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior
from ...util.linalg import pdinv, dpotrs, tdot
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
import numpy as np
from ...util.caching import Cacher
from ...util.misc import param_to_array
log_2_pi = np.log(2*np.pi)
class VarDTC(object):
@ -17,10 +19,15 @@ class VarDTC(object):
"""
def __init__(self):
self._YYTfactor_cache = caching.cache()
#self._YYTfactor_cache = caching.cache()
self.const_jitter = 1e-6
self.get_trYYT = Cacher(self._get_trYYT, 1)
self.get_YYTfactor = Cacher(self._get_YYTfactor, 1)
def _get_trYYT(self, Y):
return param_to_array(np.sum(np.square(Y)))
def get_YYTfactor(self, Y):
def _get_YYTfactor(self, Y):
"""
find a matrix L which satisfies LLT = YYT.
@ -28,11 +35,10 @@ class VarDTC(object):
"""
N, D = Y.shape
if (N>D):
return Y
return param_to_array(Y)
else:
#if Y in self.cache, return self.Cache[Y], else stor Y in cache and return L.
raise NotImplementedError, 'TODO' #TODO
return jitchol(tdot(Y))
def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective
@ -42,7 +48,13 @@ class VarDTC(object):
num_data, output_dim = Y.shape
#see whether we're using variational uncertain inputs
uncertain_inputs = (X_variance is None)
uncertain_inputs = not (X_variance is None)
#see whether we've got a different noise variance for each datum
beta = 1./np.squeeze(likelihood.variance)
het_noise = False
if beta.size <1:
het_noise = True
# kernel computations, using BGPLVM notation
Kmm = kern.K(Z)
@ -59,30 +71,38 @@ class VarDTC(object):
# The rather complex computations of A
if uncertain_inputs:
if likelihood.is_heteroscedastic:
psi2_beta = (psi2 * (likelihood.precision.flatten().reshape(num_data, 1, 1))).sum(0)
if het_noise:
psi2_beta = (psi2 * (beta.flatten().reshape(num_data, 1, 1))).sum(0)
else:
psi2_beta = psi2.sum(0) * likelihood.precision
evals, evecs = linalg.eigh(psi2_beta)
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
if not np.array_equal(evals, clipped_evals):
pass # print evals
tmp = evecs * np.sqrt(clipped_evals)
tmp = tmp.T
psi2_beta = psi2.sum(0) * beta
if 0:
evals, evecs = linalg.eigh(psi2_beta)
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
if not np.array_equal(evals, clipped_evals):
pass # print evals
tmp = evecs * np.sqrt(clipped_evals)
tmp = tmp.T
# no backsubstitution because of bound explosion on tr(A) if not...
LmInv, _ = dtrtri(Lm, lower=1)
A = LmInv.dot(psi2_beta.dot(LmInv.T))
#print A.sum()
else:
if likelihood.is_heteroscedastic:
tmp = psi1 * (np.sqrt(likelihood.precision.flatten().reshape(num_data, 1)))
if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
else:
tmp = psi1 * (np.sqrt(likelihood.precision))
tmp, _ = dtrtrs(Lm, np.asfortranarray(tmp.T), lower=1)
A = tdot(tmp)
tmp = psi1 * (np.sqrt(beta))
tmp, _ = dtrtrs(Lm, np.asfortranarray(tmp.T), lower=1)
A = tdot(tmp)
# factor B
B = np.eye(num_inducing) + A
self.A = A
LB = jitchol(B)
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
VVT_factor = self.get_VVTfactor(Y, likelihood.precision)
self.YYTfactor = self.get_YYTfactor(Y)
VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
trYYT = self.get_trYYT(Y)
psi1Vf = np.dot(psi1.T, VVT_factor)
# back substutue C into psi1Vf
@ -91,17 +111,6 @@ class VarDTC(object):
tmp, info2 = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
Cpsi1Vf, info3 = dtrtrs(Lm, tmp, lower=1, trans=1)
#compute log marginal likelihood
if likelihood.is_heteroscedastic:
A = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(likelihood.precision)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
B = -0.5 * output_dim * (np.sum(likelihood.precision.flatten() * psi0) - np.trace(_A))
else:
A = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(likelihood.precision)) - 0.5 * likelihood.precision * likelihood.trYYT
B = -0.5 * output_dim * (np.sum(likelihood.precision * psi0) - np.trace(_A))
C = -output_dim * (np.sum(np.log(np.diag(LB)))) # + 0.5 * num_inducing * np.log(sf2))
D = 0.5 * data_fit
log_marginal = A + B + C + D
# Compute dL_dKmm
tmp = tdot(_LBi_Lmi_psi1Vf)
@ -112,21 +121,20 @@ class VarDTC(object):
tmp += output_dim * np.eye(num_inducing)
dL_dKmm = backsub_both_sides(Lm, tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
dL_dpsi0 = -0.5 * output_dim * (likelihood.precision * np.ones([num_data, 1])).flatten()
dL_dpsi1 = np.dot(likelihood.VVT_factor, Cpsi1Vf.T)
# Compute dL_dpsi
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
if likelihood.is_heteroscedastic:
if has_uncertain_inputs:
dL_dpsi2 = likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
if het_noise:
if uncertain_inputs:
dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :]
else:
dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * likelihood.precision.reshape(num_data, 1)).T).T
dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * beta.reshape(num_data, 1)).T).T
dL_dpsi2 = None
else:
dL_dpsi2 = likelihood.precision * dL_dpsi2_beta
if has_uncertain_inputs:
dL_dpsi2 = beta * dL_dpsi2_beta
if uncertain_inputs:
# repeat for each of the N psi_2 matrices
dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0)
else:
@ -139,41 +147,66 @@ class VarDTC(object):
if likelihood.size == 0:
# save computation here.
partial_for_likelihood = None
elif likelihood.is_heteroscedastic:
if has_uncertain_inputs:
elif het_noise:
if uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
else:
LBi = chol_inv(LB)
Lmi_psi1, nil = dtrtrs(Lm, np.asfortranarray(psi1.T), lower=1, trans=0)
_LBi_Lmi_psi1, _ = dtrtrs(LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
partial_for_likelihood = -0.5 * beta + 0.5 * likelihood.V**2
partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
partial_for_likelihood = -0.5 * likelihood.precision + 0.5 * likelihood.V**2
partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * likelihood.precision**2
partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*likelihood.precision**2
partial_for_likelihood += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * likelihood.precision**2
partial_for_likelihood += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * likelihood.precision**2
partial_for_likelihood += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * beta**2
partial_for_likelihood += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
else:
# likelihood is not heteroscedatic
partial_for_likelihood = -0.5 * num_data * output_dim * likelihood.precision + 0.5 * likelihood.trYYT * likelihood.precision ** 2
partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * likelihood.precision ** 2 - np.trace(_A) * likelihood.precision)
partial_for_likelihood += likelihood.precision * (0.5 * np.sum(_A * DBi_plus_BiPBi) - data_fit)
partial_for_likelihood = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
partial_for_likelihood += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
#compute log marginal likelihood
if het_noise:
lik_1 = -0.5 * num_data * output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(likelihood.V * likelihood.Y)
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))
else:
lik_1 = -0.5 * num_data * output_dim * (np.log(2.*np.pi) - np.log(beta)) - 0.5 * beta * trYYT
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))
lik_3 = -output_dim * (np.sum(np.log(np.diag(LB))))
lik_4 = 0.5 * data_fit
log_marginal = lik_1 + lik_2 + lik_3 + lik_4
#put the gradients in the right places
likelihood.update_gradients(partial_for_likelihood)
if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2}
kern.update_gradients_variational(mu=X, S=X_variance, Z=Z, **grad_dict)
else:
grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1}
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
#get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop?
if VVT_factor.shape[1] == Y.shape[1]:
Cpsi1V = Cpsi1Vf
woodbury_vector = Cpsi1Vf # == Cpsi1V
else:
raise NotImplementedError #TODO
psi1V = np.dot(Y.T*beta, psi1).T
tmp, _ = dtrtrs(Lm, np.asfortranarray(psi1V), lower=1, trans=0)
tmp, _ = dpotrs(LB, tmp, lower=1)
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
Bi, _ = dpotri(LB, lower=0)
symmetrify(Bi)
woodbury_inv = backsub_both_sides(Lm, np.eye(num_inducing) - Bi)
#construct a posterior object
post = Posterior(woodbury_chol=None, woodbury_vector=Cpsi1V, K=None, mean=None, cov=None, K_chol=None)
return
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict

View file

@ -69,8 +69,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
success = True # Force calculation of directional derivs.
nsuccess = 0 # nsuccess counts number of successes.
beta = 1.0 # Initial scale parameter.
betamin = 1.0e-60 # Lower bound on scale.
betamax = 1.0e50 # Upper bound on scale.
betamin = 1.0e-15 # Lower bound on scale.
betamax = 1.0e15 # Upper bound on scale.
status = "Not converged"
flog = [fold]

View file

@ -8,6 +8,7 @@ from parts.prod import Prod as prod
from parts.linear import Linear
from parts.kernpart import Kernpart
from ..core.parameterization import Parameterized
from GPy.core.parameterization.param import Param
class kern(Parameterized):
def __init__(self, input_dim, parts=[], input_slices=None):
@ -84,7 +85,7 @@ class kern(Parameterized):
# represents the gradient in the transformed space (i.e. that given by
# get_params_transformed())
#
# :param g: the gradient vector for the current model, usually created by dK_dtheta
# :param g: the gradient vector for the current model, usually created by _param_grad_helper
# """
# x = self._get_params()
# [np.place(g, index, g[index] * constraint.gradfactor(x[index]))
@ -152,14 +153,16 @@ class kern(Parameterized):
# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
# newkern.fixed_values = self.fixed_values + other.fixed_values
# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
[newkern._add_constrain(param, transform, warning=False)
for param, transform in itertools.izip(
*itertools.chain(self.constraints.iteritems(),
other.constraints.iteritems()))]
[newkern.constraints.add(transform, ind) for transform, ind in self.constraints.iteritems()]
[newkern.constraints.add(transform, ind+self.size) for transform, ind in other.constraints.iteritems()]
newkern._fixes_ = ((self._fixes_ or 0) + (other._fixes_ or 0)) or None
return newkern
def __call__(self, X, X2=None):
return self.K(X, X2)
def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
@ -180,8 +183,10 @@ class kern(Parameterized):
:type tensor: bool
"""
K1 = self.copy()
K2 = other.copy()
K1 = self
K2 = other
#K1 = self.copy()
#K2 = other.copy()
slices = []
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
@ -284,11 +289,12 @@ class kern(Parameterized):
[p.update_gradients_full(dL_dK, X) for p in self._parameters_]
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
raise NotImplementedError
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
raise NotImplementedError
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_]
def dK_dtheta(self, dL_dK, X, X2=None):
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
def _param_grad_helper(self, dL_dK, X, X2=None):
"""
Compute the gradient of the covariance function with respect to the parameters.
@ -304,9 +310,9 @@ class kern(Parameterized):
assert X.shape[1] == self.input_dim
target = np.zeros(self.size)
if X2 is None:
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
[p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
else:
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
[p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
return self._transform_gradients(target)
@ -478,6 +484,7 @@ from GPy.core.model import Model
class Kern_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Model.__init__(self, 'kernel_test_model')
num_samples = 20
num_samples2 = 10
if kernel==None:
@ -489,14 +496,12 @@ class Kern_check_model(Model):
dL_dK = np.ones((X.shape[0], X.shape[0]))
else:
dL_dK = np.ones((X.shape[0], X2.shape[0]))
self.kernel=kernel
self.add_parameter(kernel)
self.X = X
self.X2 = X2
self.dL_dK = dL_dK
#self.constrained_indices=[]
#self.constraints=[]
Model.__init__(self, 'kernel_test_model')
def is_positive_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0]
@ -505,15 +510,6 @@ class Kern_check_model(Model):
else:
return True
def _get_params(self):
return self.kernel._get_params()
def _get_param_names(self):
return self.kernel._get_param_names()
def _set_params(self, x):
self.kernel._set_params(x)
def log_likelihood(self):
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
@ -526,7 +522,7 @@ class Kern_check_dK_dtheta(Kern_check_model):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
def _log_likelihood_gradients(self):
return self.kernel.dK_dtheta(self.dL_dK, self.X, self.X2)
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
class Kern_check_dKdiag_dtheta(Kern_check_model):
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
@ -534,6 +530,8 @@ class Kern_check_dKdiag_dtheta(Kern_check_model):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def parameters_changed(self):
self.kernel.update_gradients_full(self.dL_dK, self.X)
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
@ -545,41 +543,25 @@ class Kern_check_dK_dX(Kern_check_model):
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
self.remove_parameter(kernel)
self.X = Param('X', self.X)
self.add_parameter(self.X)
def _log_likelihood_gradients(self):
return self.kernel.dK_dX(self.dL_dK, self.X, self.X2).flatten()
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
def _get_param_names(self):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)
class Kern_check_dKdiag_dX(Kern_check_model):
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
if dL_dK==None:
self.dL_dK = np.ones((self.X.shape[0]))
def log_likelihood(self):
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
def _log_likelihood_gradients(self):
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
def _get_param_names(self):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
"""
This function runs on kernels to check the correctness of their
@ -654,7 +636,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
except NotImplementedError:
result=True
if verbose:
print("dK_dX not implemented for " + kern.name)
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
@ -670,7 +652,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
except NotImplementedError:
result=True
if verbose:
print("dK_dX not implemented for " + kern.name)
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
@ -686,7 +668,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
except NotImplementedError:
result=True
if verbose:
print("dK_dX not implemented for " + kern.name)
print("gradients_X not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:

View file

@ -43,7 +43,7 @@ class Brownian(Kernpart):
def Kdiag(self,X,target):
target += self.variance*X.flatten()
def dK_dtheta(self,dL_dK,X,X2,target):
def _param_grad_helper(self,dL_dK,X,X2,target):
if X2 is None:
X2 = X
target += np.sum(np.fmin(X,X2.T)*dL_dK)
@ -51,7 +51,7 @@ class Brownian(Kernpart):
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.dot(X.flatten(), dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target):
def gradients_X(self,dL_dK,X,X2,target):
raise NotImplementedError, "TODO"
#target += self.variance
#target -= self.variance*theta(X-X2.T)

View file

@ -74,7 +74,7 @@ class Matern32(Kernpart):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target):
def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
@ -96,7 +96,7 @@ class Matern32(Kernpart):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(dL_dKdiag)
def dK_dX(self, dL_dK, X, X2, target):
def gradients_X(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X[None, :, :]) / self.lengthscale), -1))[:, :, None]
@ -105,8 +105,8 @@ class Matern32(Kernpart):
else:
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
gradients_X = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass

View file

@ -74,7 +74,7 @@ class Matern52(Kernpart):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target)
def dK_dtheta(self,dL_dK,X,X2,target):
def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
@ -96,7 +96,7 @@ class Matern52(Kernpart):
"""derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target):
def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X[None,:,:])/self.lengthscale),-1))[:,:,None]
@ -104,8 +104,8 @@ class Matern52(Kernpart):
else:
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
gradients_X = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(gradients_X*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,dL_dKdiag,X,target):
pass

View file

@ -90,7 +90,7 @@ class ODE_1(Kernpart):
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
def dK_dtheta(self, dL_dK, X, X2, target):
def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.abs(X - X2.T)

View file

@ -124,7 +124,7 @@ class Eq_ode1(Kernpart):
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
pass
def dK_dtheta(self,dL_dK,X,X2,target):
def _param_grad_helper(self,dL_dK,X,X2,target):
# First extract times and indices.
self._extract_t_indices(X, X2, dL_dK=dL_dK)
@ -193,7 +193,7 @@ class Eq_ode1(Kernpart):
def dKdiag_dtheta(self,dL_dKdiag,index,target):
pass
def dK_dX(self,dL_dK,X,X2,target):
def gradients_X(self,dL_dK,X,X2,target):
pass
def _extract_t_indices(self, X, X2=None, dL_dK=None):

View file

@ -75,7 +75,7 @@ class Exponential(Kernpart):
"""Compute the diagonal of the covariance matrix associated to X."""
np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target):
def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))
@ -95,13 +95,13 @@ class Exponential(Kernpart):
# NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(dL_dKdiag)
def dK_dX(self, dL_dK, X, X2, target):
def gradients_X(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
gradients_X = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass

View file

@ -50,7 +50,7 @@ class FiniteDimensional(Kernpart):
def Kdiag(self,X,target):
product = np.diag(self.K(X, X))
np.add(target,product,target)
def dK_dtheta(self,X,X2,target):
def _param_grad_helper(self,X,X2,target):
"""Return shape is NxMx(Ntheta)"""
if X2 is None: X2 = X
FX = np.column_stack([f(X) for f in self.F])

View file

@ -31,10 +31,10 @@ class Fixed(Kernpart):
def K(self, X, X2, target):
target += self.variance * self.fixed_K
def dK_dtheta(self, partial, X, X2, target):
def _param_grad_helper(self, partial, X, X2, target):
target += (partial * self.fixed_K).sum()
def dK_dX(self, partial, X, X2, target):
def gradients_X(self, partial, X, X2, target):
pass
def dKdiag_dX(self, partial, X, target):

View file

@ -85,7 +85,7 @@ class Gibbs(Kernpart):
"""Compute the diagonal of the covariance matrix for X."""
np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target):
def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
self._dK_computations(dL_dK)
@ -97,7 +97,7 @@ class Gibbs(Kernpart):
target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping])
def dK_dX(self, dL_dK, X, X2, target):
def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X."""
# First account for gradients arising from presence of X in exponent.
self._K_computations(X, X2)
@ -105,8 +105,8 @@ class Gibbs(Kernpart):
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
dK_dX = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
target += np.sum(dK_dX*dL_dK.T[:, :, None], 0)
gradients_X = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
target += np.sum(gradients_X*dL_dK.T[:, :, None], 0)
# Now account for gradients arising from presence of X in lengthscale.
self._dK_computations(dL_dK)
if X2 is None:

View file

@ -80,7 +80,7 @@ class Hetero(Kernpart):
"""Helper function for computing the diagonal elements of the covariance."""
return self.mapping.f(X).flatten()**2
def dK_dtheta(self, dL_dK, X, X2, target):
def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
if (X2 is None) or (X2 is X):
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
@ -90,7 +90,7 @@ class Hetero(Kernpart):
"""Gradient of diagonal of covariance with respect to parameters."""
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
def dK_dX(self, dL_dK, X, X2, target):
def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X."""
if X2==None or X2 is X:
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]

View file

@ -50,19 +50,19 @@ class Hierarchical(Kernpart):
#X,slices = X[:,:-1],index_to_slices(X[:,-1])
#[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
def dK_dtheta(self,dL_dK,X,X2,target):
def _param_grad_helper(self,dL_dK,X,X2,target):
X, X2, slices, slices2 = self._sort_slices(X,X2)
[[[[k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)]
[[[[k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target[p_start:p_stop]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices_, slices2_)] for k, p_start, p_stop, slices_, slices2_ in zip(self.parts, self.param_starts, self.param_stops, slices, slices2)]
def dK_dX(self,dL_dK,X,X2,target):
def gradients_X(self,dL_dK,X,X2,target):
raise NotImplementedError
#X,slices = X[:,:-1],index_to_slices(X[:,-1])
#if X2 is None:
#X2,slices2 = X,slices
#else:
#X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
#[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
#[[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
#
def dKdiag_dX(self,dL_dKdiag,X,target):
raise NotImplementedError

View file

@ -70,22 +70,22 @@ class IndependentOutputs(Kernpart):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
def dK_dtheta(self,dL_dK,X,X2,target):
def _param_grad_helper(self,dL_dK,X,X2,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
[[[self.k._param_grad_helper(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def dK_dX(self,dL_dK,X,X2,target):
def gradients_X(self,dL_dK,X,X2,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
[[[self.k.gradients_X(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def dKdiag_dX(self,dL_dKdiag,X,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])

View file

@ -20,7 +20,6 @@ class Kernpart(Parameterized):
# the number of optimisable parameters
# the name of the covariance function.
# link to parameterized objects
self._parameters_ = []
#self._X = None
def connect_input(self, X):
@ -76,14 +75,14 @@ class Kernpart(Parameterized):
raise NotImplementedError
def Kdiag(self,X,target):
raise NotImplementedError
def dK_dtheta(self,dL_dK,X,X2,target):
def _param_grad_helper(self,dL_dK,X,X2,target):
raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target):
# In the base case compute this by calling dK_dtheta. Need to
# In the base case compute this by calling _param_grad_helper. Need to
# override for stationary covariances (for example) to save
# time.
for i in range(X.shape[0]):
self.dK_dtheta(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
def psi0(self,Z,mu,S,target):
raise NotImplementedError
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
@ -106,12 +105,19 @@ class Kernpart(Parameterized):
raise NotImplementedError
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
raise NotImplementedError
def dK_dX(self, dL_dK, X, X2, target):
def gradients_X(self, dL_dK, X, X2, target):
raise NotImplementedError
def dKdiag_dX(self, dL_dK, X, target):
raise NotImplementedError
def update_gradients_full(self, dL_dK, X):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
"""Set the gradients of all parameters when doing sparse (M) inference."""
raise NotImplementedError
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
raise NotImplementedError
class Kernpart_stationary(Kernpart):
def __init__(self, input_dim, lengthscale=None, ARD=False):

View file

@ -8,6 +8,7 @@ from kernpart import Kernpart
from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Linear(Kernpart):
"""
@ -43,8 +44,9 @@ class Linear(Kernpart):
else:
variances = np.ones(self.input_dim)
self.variances = Param('variances', variances)
self.add_parameters(self.variances)
self.variances = Param('variances', variances, Logexp())
self.variances.gradient = np.zeros(self.variances.shape)
self.add_parameter(self.variances)
self.variances.add_observer(self, self.update_variance)
# initialize cache
@ -57,21 +59,35 @@ class Linear(Kernpart):
def on_input_change(self, X):
self._K_computations(X, None)
# def _get_params(self):
# return self.variances
#
# def _set_params(self, x):
# assert x.size == (self.num_params)
# self.variances = x
#def parameters_changed(self):
# self.variances2 = np.square(self.variances)
#
# def _get_param_names(self):
# if self.num_params == 1:
# return ['variance']
# else:
# return ['variance_%i' % i for i in range(self.variances.size)]
def update_gradients_full(self, dL_dK, X):
#self.variances.gradient[:] = 0
self._param_grad_helper(dL_dK, X, self.variances.gradient)
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
self.variances.gradient = tmp.sum(0)
else:
self.variances.gradient = tmp.sum()
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self._psi_computations(Z, mu, S)
# psi0:
tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD: self.variances.gradient[:] = tmp.sum(0)
else: self.variances.gradient[:] = tmp.sum()
#psi1
self._param_grad_helper(dL_dpsi1, mu, Z, self.variances.gradient)
#psi2
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
else: self.variances.gradient += tmp.sum()
#from Kmm
self._K_computations(Z, None)
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
def K(self, X, X2, target):
if self.ARD:
XX = X * np.sqrt(self.variances)
@ -88,7 +104,7 @@ class Linear(Kernpart):
def Kdiag(self, X, target):
np.add(target, np.sum(self.variances * np.square(X), -1), target)
def dK_dtheta(self, dL_dK, X, X2, target):
def _param_grad_helper(self, dL_dK, X, X2, target):
if self.ARD:
if X2 is None:
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)]
@ -100,14 +116,7 @@ class Linear(Kernpart):
self._K_computations(X, X2)
target += np.sum(self._dot_product * dL_dK)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
target += tmp.sum(0)
else:
target += tmp.sum()
def dK_dX(self, dL_dK, X, X2, target):
def gradients_X(self, dL_dK, X, X2, target):
if X2 is None:
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
else:
@ -124,14 +133,6 @@ class Linear(Kernpart):
self._psi_computations(Z, mu, S)
target += np.sum(self.variances * self.mu2_S, 1)
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD:
target += tmp.sum(0)
else:
target += tmp.sum()
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
target_S += dL_dpsi0[:, None] * self.variances
@ -140,17 +141,13 @@ class Linear(Kernpart):
"""the variance, it does nothing"""
self._psi1 = self.K(mu, Z, target)
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
"""the variance, it does nothing"""
self.dK_dtheta(dL_dpsi1, mu, Z, target)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
"""Do nothing for S, it does not affect psi1"""
self._psi_computations(Z, mu, S)
target_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self.dK_dX(dL_dpsi1.T, Z, mu, target)
self.gradients_X(dL_dpsi1.T, Z, mu, target)
def psi2(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
@ -164,25 +161,17 @@ class Linear(Kernpart):
def dpsi2_dtheta_new(self, dL_dpsi2, Z, mu, S, target):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp)
self.dK_dtheta(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target)
self._param_grad_helper(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target)
result= 2.*(dL_dpsi2[:,:,:,None]*S[:,None,None,:]*self.variances*Z[None,:,None,:]*Z[None,None,:,:]).sum(0).sum(0).sum(0)
if self.ARD:
target += result.sum(0).sum(0).sum(0)
else:
target += result.sum()
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
if self.ARD:
target += tmp.sum(0).sum(0).sum(0)
else:
target += tmp.sum()
def dpsi2_dmuS_new(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
tmp = np.zeros((mu.shape[0], Z.shape[0]))
self.K(mu,Z,tmp)
self.dK_dX(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu)
self.gradients_X(2.*np.sum(dL_dpsi2*tmp[:,None,:],2),mu,Z,target_mu)
Zs = Z*self.variances
Zs_sq = Zs[:,None,:]*Zs[None,:,:]
@ -288,11 +277,11 @@ class Linear(Kernpart):
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
self._X = X.copy()
if X2 is None:
self._dot_product = tdot(X)
self._dot_product = tdot(param_to_array(X))
self._X2 = None
else:
self._X2 = X2.copy()
self._dot_product = np.dot(X, X2.T)
self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2

View file

@ -77,7 +77,7 @@ class MLP(Kernpart):
self._K_diag_computations(X)
target+= self.variance*self._K_diag_dvar
def dK_dtheta(self, dL_dK, X, X2, target):
def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
denom3 = self._K_denom*self._K_denom*self._K_denom
@ -107,7 +107,7 @@ class MLP(Kernpart):
target[0] += np.sum(self._K_dvar*dL_dK)
def dK_dX(self, dL_dK, X, X2, target):
def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_asin_arg

View file

@ -112,7 +112,7 @@ class PeriodicMatern32(Kernpart):
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
@silence_errors
def dK_dtheta(self,dL_dK,X,X2,target):
def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)

View file

@ -114,7 +114,7 @@ class PeriodicMatern52(Kernpart):
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
@silence_errors
def dK_dtheta(self,dL_dK,X,X2,target):
def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)

View file

@ -6,6 +6,7 @@ from kernpart import Kernpart
import numpy as np
from GPy.util.linalg import mdot
from GPy.util.decorators import silence_errors
from GPy.core.parameterization.param import Param
class PeriodicExponential(Kernpart):
"""
@ -25,9 +26,9 @@ class PeriodicExponential(Kernpart):
"""
def __init__(self, input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi):
def __init__(self, input_dim=1, variance=1., lengthscale=None, period=2 * np.pi, n_freq=10, lower=0., upper=4 * np.pi, name='periodic_exp'):
super(PeriodicExponential, self).__init__(input_dim, name)
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
self.name = 'periodic_exp'
self.input_dim = input_dim
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
@ -38,7 +39,11 @@ class PeriodicExponential(Kernpart):
self.num_params = 3
self.n_freq = n_freq
self.n_basis = 2*n_freq
self._set_params(np.hstack((variance,lengthscale,period)))
self.variance = Param('variance', variance)
self.lengthscale = Param('lengthscale', lengthscale)
self.period = Param('period', period)
self.parameters_changed()
#self._set_params(np.hstack((variance,lengthscale,period)))
def _cos(self,alpha,omega,phase):
def f(x):
@ -61,30 +66,25 @@ class PeriodicExponential(Kernpart):
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
return Gint
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale,self.period))
#def _get_params(self):
# """return the value of the parameters."""
# return np.hstack((self.variance,self.lengthscale,self.period))
def _set_params(self,x):
def parameters_changed(self):
"""set the value of the parameters."""
assert x.size==3
self.variance = x[0]
self.lengthscale = x[1]
self.period = x[2]
self.a = [1./self.lengthscale, 1.]
self.b = [1]
self.basis_alpha = np.ones((self.n_basis,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))[:,0]
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
def _get_param_names(self):
"""return parameter names."""
return ['variance','lengthscale','period']
#def _get_param_names(self):
# """return parameter names."""
# return ['variance','lengthscale','period']
def Gram_matrix(self):
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
@ -110,7 +110,7 @@ class PeriodicExponential(Kernpart):
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
@silence_errors
def dK_dtheta(self,dL_dK,X,X2,target):
def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)

View file

@ -86,7 +86,7 @@ class POLY(Kernpart):
self._K_diag_computations(X)
target+= self.variance*self._K_diag_dvar
def dK_dtheta(self, dL_dK, X, X2, target):
def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
@ -99,7 +99,7 @@ class POLY(Kernpart):
target[2] += base_cov_grad.sum()
def dK_dX(self, dL_dK, X, X2, target):
def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_poly_arg

View file

@ -19,36 +19,21 @@ class Prod(Kernpart):
"""
def __init__(self,k1,k2,tensor=False,name="product"):
if tensor:
super(Prod, self).__init__(k1.input_dim + k2.input_dim, name)
else:
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to sum don't have the same dimension."
super(Prod, self).__init__(k1.input_dim, name)
#self.num_params = k1.num_params + k2.num_params
self.k1 = k1
self.k2 = k2
if tensor:
self.slice1 = slice(0,self.k1.input_dim)
self.slice2 = slice(self.k1.input_dim,self.k1.input_dim+self.k2.input_dim)
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
self.slice1 = slice(0,k1.input_dim)
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
else:
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
self.slice1 = slice(0,self.input_dim)
self.slice2 = slice(0,self.input_dim)
self._X, self._X2 = np.empty(shape=(2,1))
self.k1 = k1
self.k2 = k2
self.add_parameters(self.k1, self.k2)
# self._set_params(np.hstack((k1._get_params(),k2._get_params())))
# def _get_params(self):
# """return the value of the parameters."""
# return np.hstack((self.k1._get_params(), self.k2._get_params()))
#
# def _set_params(self,x):
# """set the value of the parameters."""
# self.k1._set_params(x[:self.k1.num_params])
# self.k2._set_params(x[self.k1.num_params:])
#
# def _get_param_names(self):
# """return parameter names."""
# return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
#initialize cache
self._X, self._X2 = np.empty(shape=(2,1))
self._params = None
def K(self,X,X2,target):
self._K_computations(X,X2)
@ -64,15 +49,20 @@ class Prod(Kernpart):
self._K_computations(X, X2)
return self._K2
def dK_dtheta(self,dL_dK,X,X2,target):
def update_gradients_full(self, dL_dK, X):
self._K_computations(X, None)
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
def _param_grad_helper(self,dL_dK,X,X2,target):
"""Derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2)
if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
@ -82,6 +72,7 @@ class Prod(Kernpart):
self.k2.Kdiag(X[:,self.slice2],target2)
target += target1 * target2
def dKdiag_dtheta(self,dL_dKdiag,X,target):
K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0])
@ -90,21 +81,21 @@ class Prod(Kernpart):
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
def dK_dX(self,dL_dK,X,X2,target):
def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
self._K_computations(X,X2)
if X2 is None:
if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize):
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize):
#NOTE The indices column in the inputs makes the ki.dK_dX fail when passing None instead of X[:,self.slicei]
#NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei]
X2 = X
self.k1.dK_dX(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.dK_dX(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
else:
self.k1.dK_dX(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.dK_dX(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0])
@ -112,8 +103,8 @@ class Prod(Kernpart):
self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dK_dX(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):

View file

@ -41,15 +41,15 @@ class prod_orthogonal(Kernpart):
self._K_computations(X,X2)
target += self._K1 * self._K2
def dK_dtheta(self,dL_dK,X,X2,target):
def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2)
if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
self.k1._param_grad_helper(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
@ -67,11 +67,11 @@ class prod_orthogonal(Kernpart):
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:])
def dK_dX(self,dL_dK,X,X2,target):
def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
self._K_computations(X,X2)
self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target)
self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target)
self.k1.gradients_X(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target)
self.k2.gradients_X(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target)
def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0])
@ -79,8 +79,8 @@ class prod_orthogonal(Kernpart):
self.k1.Kdiag(X[:,0:self.k1.input_dim],K1)
self.k2.Kdiag(X[:,self.k1.input_dim:],K2)
self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.input_dim], target)
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.input_dim:], target)
self.k1.gradients_X(dL_dKdiag*K2, X[:,:self.k1.input_dim], target)
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.k1.input_dim:], target)
def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):

View file

@ -52,7 +52,7 @@ class RationalQuadratic(Kernpart):
def Kdiag(self,X,target):
target += self.variance
def dK_dtheta(self,dL_dK,X,X2,target):
def _param_grad_helper(self,dL_dK,X,X2,target):
if X2 is None: X2 = X
dist2 = np.square((X-X2.T)/self.lengthscale)
@ -68,7 +68,7 @@ class RationalQuadratic(Kernpart):
target[0] += np.sum(dL_dKdiag)
# here self.lengthscale and self.power have no influence on Kdiag so target[1:] are unchanged
def dK_dX(self,dL_dK,X,X2,target):
def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None:
dist2 = np.square((X-X.T)/self.lengthscale)

View file

@ -8,6 +8,7 @@ from kernpart import Kernpart
from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class RBF(Kernpart):
"""
@ -50,9 +51,12 @@ class RBF(Kernpart):
else:
lengthscale = np.ones(self.input_dim)
self.variance = Param('variance', variance)
self.variance = Param('variance', variance, Logexp())
self.lengthscale = Param('lengthscale', lengthscale)
self.lengthscale.add_observer(self, self.update_lengthscale)
self.update_lengthscale(self.lengthscale)
self.add_parameters(self.variance, self.lengthscale)
self.parameters_changed() # initializes cache
@ -114,16 +118,16 @@ class RBF(Kernpart):
self._K_computations(X, Z)
self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
if self.ARD:
self.lengthscales.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
else:
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
#from Kmm
self._K_computations(Z, None)
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
if self.ARD:
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
@ -138,7 +142,7 @@ class RBF(Kernpart):
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD:
self.lengthscale.gradeint = dpsi1_dlength.sum()
self.lengthscale.gradient = dpsi1_dlength.sum()
else:
self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0)
@ -157,9 +161,9 @@ class RBF(Kernpart):
self._K_computations(Z, None)
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
if self.ARD:
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
def gradients_X(self, dL_dK, X, X2, target):
#if self._X is None or X.base is not self._X.base or X2 is not None:
@ -168,8 +172,8 @@ class RBF(Kernpart):
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass

View file

@ -97,7 +97,7 @@ class RBFInv(RBF):
# return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)]
# TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale)
def dK_dtheta(self, dL_dK, X, X2, target):
def _param_grad_helper(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
target[0] += np.sum(self._K_dvar * dL_dK)
if self.ARD:
@ -142,14 +142,14 @@ class RBFInv(RBF):
else:
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) * (-self.lengthscale2)
def dK_dX(self, dL_dK, X, X2, target):
def gradients_X(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
gradients_X = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass

View file

@ -73,7 +73,7 @@ class RBFCos(Kernpart):
def Kdiag(self,X,target):
np.add(target,self.variance,target)
def dK_dtheta(self,dL_dK,X,X2,target):
def _param_grad_helper(self,dL_dK,X,X2,target):
self._K_computations(X,X2)
target[0] += np.sum(dL_dK*self._dvar)
if self.ARD:
@ -88,7 +88,7 @@ class RBFCos(Kernpart):
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target[0] += np.sum(dL_dKdiag)
def dK_dX(self,dL_dK,X,X2,target):
def gradients_X(self,dL_dK,X,X2,target):
#TODO!!!
raise NotImplementedError

View file

@ -50,7 +50,7 @@ class Spline(Kernpart):
def Kdiag(self,X,target):
target += self.variance*X.flatten()**3/3.
def dK_dtheta(self,X,X2,target):
def _param_grad_helper(self,X,X2,target):
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
def dKdiag_dtheta(self,X,target):

352
GPy/kern/parts/ss_rbf.py Normal file
View file

@ -0,0 +1,352 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kernpart import Kernpart
from ...util.linalg import tdot
from ...util.misc import fast_array_equal, param_to_array
from ...core.parameterization import Param
class SS_RBF(Kernpart):
"""
The RBF kernel for Spike-and-Slab GPLVM
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the vector of lengthscale of the kernel
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:rtype: kernel object
"""
def __init__(self, input_dim, variance=1., lengthscale=None, name='rbf'):
super(RBF, self).__init__(input_dim, name)
self.input_dim = input_dim
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
lengthscale = np.ones(self.input_dim)
self.variance = Param('variance', variance)
self.lengthscale = Param('lengthscale', lengthscale)
self.lengthscale.add_observer(self, self.update_lengthscale)
self.add_parameters(self.variance, self.lengthscale)
self.parameters_changed() # initializes cache
def on_input_change(self, X):
#self._K_computations(X, None)
pass
def update_lengthscale(self, l):
self.lengthscale2 = np.square(self.lengthscale)
def parameters_changed(self):
# reset cached results
self._X, self._X2 = np.empty(shape=(2, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def K(self, X, X2, target):
self._K_computations(X, X2)
target += self.variance * self._K_dvar
def Kdiag(self, X, target):
np.add(target, self.variance, target)
def psi0(self, Z, mu, S, target):
target += self.variance
def psi1(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi1
def psi2(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi2
def update_gradients_full(self, dL_dK, X):
self._K_computations(X, None)
self.variance.gradient = np.sum(self._K_dvar * dL_dK)
if self.ARD:
self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dK, X, None)
else:
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
#contributions from Kdiag
self.variance.gradient = np.sum(dL_dKdiag)
#from Knm
self._K_computations(X, Z)
self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
if self.ARD:
self.lengthscales.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
else:
self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
#from Kmm
self._K_computations(Z, None)
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
if self.ARD:
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
self._psi_computations(Z, mu, S)
#contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0)
#from psi1
self.variance.gradient += np.sum(dL_dpsi1 * self._psi1 / self.variance)
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD:
self.lengthscale.gradeint = dpsi1_dlength.sum()
else:
self.lengthscale.gradient = dpsi1_dlength.sum(0).sum(0)
#from psi2
d_var = 2.*self._psi2 / self.variance
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
self.variance.gradient += np.sum(dL_dpsi2 * d_var)
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
if not self.ARD:
self.lengthscale.gradient += dpsi2_dlength.sum()
else:
self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0)
#from Kmm
self._K_computations(Z, None)
self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
if self.ARD:
self.lengthscales.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
else:
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
def gradients_X(self, dL_dK, X, X2, target):
#if self._X is None or X.base is not self._X.base or X2 is not None:
self._K_computations(X, X2)
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
denominator = (self.lengthscale2 * (self._psi1_denom))
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
self._psi_computations(Z, mu, S)
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, X2):
#params = self._get_params()
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):# and fast_array_equal(self._params_save , params)):
#self._X = X.copy()
#self._params_save = params.copy()
if X2 is None:
self._X2 = None
X = X / self.lengthscale
Xsquare = np.sum(np.square(X), 1)
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
else:
self._X2 = X2.copy()
X = X / self.lengthscale
X2 = X2 / self.lengthscale
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
self._K_dvar = np.exp(-0.5 * self._K_dist2)
def _dL_dlengthscales_via_K(self, dL_dK, X, X2):
"""
A helper function for update_gradients_* methods
Computes the derivative of the objective L wrt the lengthscales via
dL_dl = sum_{i,j}(dL_dK_{ij} dK_dl)
assumes self._K_computations has just been called.
This is only valid if self.ARD=True
"""
target = np.zeros(self.input_dim)
dvardLdK = self._K_dvar * dL_dK
var_len3 = self.variance / np.power(self.lengthscale, 3)
if X2 is None:
# save computation for the symmetrical case
dvardLdK = dvardLdK + dvardLdK.T
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<i; j++){
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
}
}
target(q) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
X, dvardLdK = param_to_array(X, dvardLdK)
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
else:
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<num_inducing; j++){
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
}
}
target(q) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
X, X2, dvardLdK = param_to_array(X, X2, dvardLdK)
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], type_converters=weave.converters.blitz, **self.weave_options)
return target
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2
Z_changed = not fast_array_equal(Z, self._Z)
if Z_changed:
# Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q
if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
# something's changed. recompute EVERYTHING
# psi1
self._psi1_denom = S[:, None, :] / self.lengthscale2 + 1.
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
self._psi1_dist_sq = np.square(self._psi1_dist) / self.lengthscale2 / self._psi1_denom
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
self._psi1 = self.variance * np.exp(self._psi1_exponent)
# psi2
self._psi2_denom = 2.*S[:, None, None, :] / self.lengthscale2 + 1. # N,M,M,Q
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
# store matrices for caching
self._Z, self._mu, self._S = Z, mu, S
def weave_psi2(self, mu, Zhat):
N, input_dim = mu.shape
num_inducing = Zhat.shape[0]
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
psi2 = np.empty((N, num_inducing, num_inducing))
psi2_Zdist_sq = self._psi2_Zdist_sq
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
variance_sq = float(np.square(self.variance))
if self.ARD:
lengthscale2 = self.lengthscale2
else:
lengthscale2 = np.ones(input_dim) * self.lengthscale2
code = """
double tmp;
#pragma omp parallel for private(tmp)
for (int n=0; n<N; n++){
for (int m=0; m<num_inducing; m++){
for (int mm=0; mm<(m+1); mm++){
for (int q=0; q<input_dim; q++){
//compute mudist
tmp = mu(n,q) - Zhat(m,mm,q);
mudist(n,m,mm,q) = tmp;
mudist(n,mm,m,q) = tmp;
//now mudist_sq
tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q);
mudist_sq(n,m,mm,q) = tmp;
mudist_sq(n,mm,m,q) = tmp;
//now psi2_exponent
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
psi2_exponent(n,mm,m) += tmp;
if (m !=mm){
psi2_exponent(n,m,mm) += tmp;
}
//psi2 would be computed like this, but np is faster
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
//psi2(n,m,mm) = tmp;
//psi2(n,mm,m) = tmp;
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)
return mudist, mudist_sq, psi2_exponent, psi2

View file

@ -40,7 +40,7 @@ class Symmetric(Kernpart):
self.k.K(X,AX2,target)
self.k.K(AX,AX2,target)
def dK_dtheta(self,dL_dK,X,X2,target):
def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
AX = np.dot(X,self.transform)
if X2 is None:
@ -48,13 +48,13 @@ class Symmetric(Kernpart):
ZX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k.dK_dtheta(dL_dK,X,X2,target)
self.k.dK_dtheta(dL_dK,AX,X2,target)
self.k.dK_dtheta(dL_dK,X,AX2,target)
self.k.dK_dtheta(dL_dK,AX,AX2,target)
self.k._param_grad_helper(dL_dK,X,X2,target)
self.k._param_grad_helper(dL_dK,AX,X2,target)
self.k._param_grad_helper(dL_dK,X,AX2,target)
self.k._param_grad_helper(dL_dK,AX,AX2,target)
def dK_dX(self,dL_dK,X,X2,target):
def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
AX = np.dot(X,self.transform)
if X2 is None:
@ -62,10 +62,10 @@ class Symmetric(Kernpart):
ZX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k.dK_dX(dL_dK, X, X2, target)
self.k.dK_dX(dL_dK, AX, X2, target)
self.k.dK_dX(dL_dK, X, AX2, target)
self.k.dK_dX(dL_dK, AX ,AX2, target)
self.k.gradients_X(dL_dK, X, X2, target)
self.k.gradients_X(dL_dK, AX, X2, target)
self.k.gradients_X(dL_dK, X, AX2, target)
self.k.gradients_X(dL_dK, AX ,AX2, target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""

View file

@ -348,7 +348,7 @@ class spkern(Kernpart):
def Kdiag(self,X,target):
self._weave_inline(self._Kdiag_code, X, target)
def dK_dtheta(self,partial,X,Z,target):
def _param_grad_helper(self,partial,X,Z,target):
if Z is None:
self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
else:
@ -357,7 +357,7 @@ class spkern(Kernpart):
def dKdiag_dtheta(self,partial,X,target):
self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial)
def dK_dX(self,partial,X,Z,target):
def gradients_X(self,partial,X,Z,target):
if Z is None:
self._weave_inline(self._dK_dX_code_X, X, target, Z, partial)
else:

View file

@ -4,6 +4,7 @@
from kernpart import Kernpart
import numpy as np
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class White(Kernpart):
"""
@ -17,7 +18,7 @@ class White(Kernpart):
def __init__(self,input_dim,variance=1.):
super(White, self).__init__(input_dim, 'white')
self.input_dim = input_dim
self.variance = Param('variance', variance)
self.variance = Param('variance', variance, Logexp())
self.add_parameters(self.variance)
self._psi1 = 0 # TODO: more elegance here
@ -32,7 +33,7 @@ class White(Kernpart):
self.variance.gradient = np.trace(dL_dK)
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
raise NotImplementedError
self.variance.gradient = np.trace(dL_dKmm) + np.sum(dL_dKdiag)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
raise NotImplementedError

View file

@ -78,7 +78,7 @@ class Bernoulli(Likelihood):
return Z_hat, mu_hat, sigma2_hat
def _predictive_mean_analytical(self, mu, variance):
def predictive_mean(self, mu, variance):
if isinstance(self.gp_link, link_functions.Probit):
return stats.norm.cdf(mu/np.sqrt(1+variance))
@ -89,12 +89,13 @@ class Bernoulli(Likelihood):
else:
raise NotImplementedError
def _predictive_variance_analytical(self, mu, variance, pred_mean):
def predictive_variance(self, mu, variance, pred_mean):
if isinstance(self.gp_link, link_functions.Heaviside):
return 0.
else:
raise NotImplementedError
return np.nan
#raise NotImplementedError
def pdf_link(self, link_f, y, extra_data=None):
"""

View file

@ -1,11 +1,10 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#TODO
"""
A lot of this code assumes that the link functio nis the identity.
A lot of this code assumes that the link function is the identity.
I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity.
I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity.
Furthermore, exact Guassian inference can only be done for the identity link, so we should be asserting so for all calls which relate to that.
@ -18,6 +17,7 @@ from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions
from likelihood import Likelihood
from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
class Gaussian(Likelihood):
"""
@ -43,7 +43,7 @@ class Gaussian(Likelihood):
super(Gaussian, self).__init__(gp_link, name=name)
self.variance = Param('variance', variance)
self.variance = Param('variance', variance, Logexp())
self.add_parameter(self.variance)
if isinstance(gp_link, link_functions.Identity):
@ -130,7 +130,10 @@ class Gaussian(Likelihood):
:rtype: float
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
return -0.5*(np.sum((y-link_f)**2/self.variance) + self.ln_det_K + self.N*np.log(2.*np.pi))
N = y.shape[0]
ln_det_cov = N*np.log(self.variance)
return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
def dlogpdf_dlink(self, link_f, y, extra_data=None):
"""
@ -175,7 +178,8 @@ class Gaussian(Likelihood):
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
hess = -(1.0/self.variance)*np.ones((self.N, 1))
N = y.shape[0]
hess = -(1.0/self.variance)*np.ones((N, 1))
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
@ -194,7 +198,8 @@ class Gaussian(Likelihood):
:rtype: Nx1 array
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
d3logpdf_dlink3 = np.diagonal(0*self.I)[:, None]
N = y.shape[0]
d3logpdf_dlink3 = np.zeros((N,1))
return d3logpdf_dlink3
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
@ -215,7 +220,8 @@ class Gaussian(Likelihood):
assert np.asarray(link_f).shape == np.asarray(y).shape
e = y - link_f
s_4 = 1.0/(self.variance**2)
dlik_dsigma = -0.5*self.N/self.variance + 0.5*s_4*np.sum(np.square(e))
N = y.shape[0]
dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
return np.sum(dlik_dsigma) # Sure about this sum?
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
@ -255,7 +261,8 @@ class Gaussian(Likelihood):
"""
assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2)
d2logpdf_dlink2_dvar = np.diag(s_4*self.I)[:, None]
N = y.shape[0]
d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, extra_data=None):

View file

@ -13,12 +13,12 @@ from ..core.parameterization import Parameterized
class Likelihood(Parameterized):
"""
Likelihood base class, used to defing p(y|f).
Likelihood base class, used to defing p(y|f).
All instances use _inverse_ link functions, which can be swapped out. It is
expected that inherriting classes define a default inverse link function
To use this class, inherrit and define missing functionality.
To use this class, inherrit and define missing functionality.
Inherriting classes *must* implement:
pdf_link : a bound method which turns the output of the link function into the pdf
@ -27,7 +27,7 @@ class Likelihood(Parameterized):
To enable use with EP, inherriting classes *must* define:
TODO: a suitable derivative function for any parameters of the class
It is also desirable to define:
moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature.
moments_match_ep : a function to compute the EP moments If this isn't defined, the moments will be computed using 1D quadrature.
To enable use with Laplace approximation, inherriting classes *must* define:
Some derivative functions *AS TODO*
@ -36,7 +36,7 @@ class Likelihood(Parameterized):
"""
def __init__(self, gp_link, name):
super(Likelihood, self).__init__(name)
super(Likelihood, self).__init__(name)
assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation."
self.gp_link = gp_link
self.log_concave = False
@ -44,6 +44,10 @@ class Likelihood(Parameterized):
def _gradients(self,partial):
return np.zeros(0)
def update_gradients(self, partial):
if self.size > 0:
raise NotImplementedError('Must be implemented for likelihoods with parameters to be optimized')
def _preprocess_values(self,Y):
"""
In case it is needed, this function assess the output values or makes any pertinent transformation on them.
@ -303,31 +307,31 @@ class Likelihood(Parameterized):
"""
TODO: Doc strings
"""
if len(self._get_param_names()) > 0:
if self.size > 0:
link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
else:
#Is no parameters so return an empty array for its derivatives
return np.empty([1, 0])
return np.zeros([1, 0])
def dlogpdf_df_dtheta(self, f, y, extra_data=None):
"""
TODO: Doc strings
"""
if len(self._get_param_names()) > 0:
if self.size > 0:
link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else:
#Is no parameters so return an empty array for its derivatives
return np.empty([f.shape[0], 0])
return np.zeros([f.shape[0], 0])
def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
"""
TODO: Doc strings
"""
if len(self._get_param_names()) > 0:
if self.size > 0:
link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f)
@ -336,7 +340,7 @@ class Likelihood(Parameterized):
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else:
#Is no parameters so return an empty array for its derivatives
return np.empty([f.shape[0], 0])
return np.zeros([f.shape[0], 0])
def _laplace_gradients(self, f, y, extra_data=None):
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
@ -345,9 +349,12 @@ class Likelihood(Parameterized):
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
# ensure we have gradients for every parameter we want to optimize
assert dlogpdf_dtheta.shape[1] == len(self._get_param_names())
assert dlogpdf_df_dtheta.shape[1] == len(self._get_param_names())
assert d2logpdf_df2_dtheta.shape[1] == len(self._get_param_names())
try:
assert len(dlogpdf_dtheta) == self.size #1 x num_param array
assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
except Exception as e:
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta

View file

@ -19,8 +19,11 @@ class Poisson(Likelihood):
.. Note::
Y is expected to take values in {0,1,2,...}
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Poisson, self).__init__(gp_link,analytical_mean,analytical_variance)
def __init__(self, gp_link=None):
if gp_link is None:
gp_link = link_functions.Log_ex_1()
super(Poisson, self).__init__(gp_link, name='Poisson')
def _preprocess_values(self,Y):
return Y

View file

@ -8,6 +8,7 @@ import link_functions
from scipy import stats, integrate
from scipy.special import gammaln, gamma
from likelihood import Likelihood
from ..core.parameterization import Param
class StudentT(Likelihood):
"""
@ -19,26 +20,30 @@ class StudentT(Likelihood):
p(y_{i}|\\lambda(f_{i})) = \\frac{\\Gamma\\left(\\frac{v+1}{2}\\right)}{\\Gamma\\left(\\frac{v}{2}\\right)\\sqrt{v\\pi\\sigma^{2}}}\\left(1 + \\frac{1}{v}\\left(\\frac{(y_{i} - f_{i})^{2}}{\\sigma^{2}}\\right)\\right)^{\\frac{-v+1}{2}}
"""
def __init__(self,gp_link=None,analytical_mean=True,analytical_variance=True, deg_free=5, sigma2=2):
self.v = deg_free
self.sigma2 = sigma2
def __init__(self,gp_link=None, deg_free=5, sigma2=2):
if gp_link is None:
gp_link = link_functions.Identity()
super(StudentT, self).__init__(gp_link, name='Student_T')
self.sigma2 = Param('t_noise', float(sigma2))
self.v = Param('deg_free', float(deg_free))
self.add_parameter(self.sigma2)
self.add_parameter(self.v)
self.v.constrain_fixed()
self._set_params(np.asarray(sigma2))
super(StudentT, self).__init__(gp_link,analytical_mean,analytical_variance)
self.log_concave = False
def _get_params(self):
return np.asarray(self.sigma2)
def parameters_changed(self):
self.variance = (self.v / float(self.v - 2)) * self.sigma2
def _get_param_names(self):
return ["t_noise_std2"]
def _set_params(self, x):
self.sigma2 = float(x)
@property
def variance(self, extra_data=None):
return (self.v / float(self.v - 2)) * self.sigma2
def update_gradients(self, partial):
"""
Pull out the gradients, be careful as the order must match the order
in which the parameters are added
"""
self.sigma2.gradient = partial[0]
self.v.gradient = partial[1]
def pdf_link(self, link_f, y, extra_data=None):
"""
@ -82,10 +87,14 @@ class StudentT(Likelihood):
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
e = y - link_f
#FIXME:
#Why does np.log(1 + (1/self.v)*((y-link_f)**2)/self.sigma2) suppress the divide by zero?!
#But np.log(1 + (1/float(self.v))*((y-link_f)**2)/self.sigma2) throws it correctly
#print - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
objective = (+ gammaln((self.v + 1) * 0.5)
- gammaln(self.v * 0.5)
- 0.5*np.log(self.sigma2 * self.v * np.pi)
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
- gammaln(self.v * 0.5)
- 0.5*np.log(self.sigma2 * self.v * np.pi)
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
)
return np.sum(objective)
@ -222,17 +231,20 @@ class StudentT(Likelihood):
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
return np.asarray([[dlogpdf_dvar]])
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dvar, dlogpdf_dv))
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
return dlogpdf_dlink_dvar
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
return d2logpdf_dlink2_dvar
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def _predictive_variance_analytical(self, mu, sigma, predictive_mean=None):
def predictive_variance(self, mu, sigma, predictive_mean=None):
"""
Compute predictive variance of student_t*normal p(y*|f*)p(f*)
@ -252,7 +264,7 @@ class StudentT(Likelihood):
return true_var
def _predictive_mean_analytical(self, mu, sigma):
def predictive_mean(self, mu, sigma):
"""
Compute mean of the prediction
"""

View file

@ -57,4 +57,4 @@ class Kernel(Mapping):
return np.hstack((self._df_dA.flatten(), self._df_dbias))
def df_dX(self, dL_df, X):
return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
return self.kern.gradients_X((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)

View file

@ -3,7 +3,7 @@
from gp_regression import GPRegression
from gp_classification import GPClassification
from sparse_gp_regression import SparseGPRegression
from sparse_gp_regression import SparseGPRegression, SparseGPRegressionUncertainInput
from svigp_regression import SVIGPRegression
from sparse_gp_classification import SparseGPClassification
from gplvm import GPLVM

View file

@ -2,7 +2,6 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import itertools
from gplvm import GPLVM
from .. import kern
from ..core import SparseGP
@ -23,15 +22,10 @@ class BayesianGPLVM(SparseGP, GPLVM):
:type init: 'PCA'|'random'
"""
def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, name='bayesian gplvm', **kwargs):
if type(likelihood_or_Y) is np.ndarray:
likelihood = Gaussian(likelihood_or_Y)
else:
likelihood = likelihood_or_Y
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=Gaussian(), name='bayesian gplvm', **kwargs):
if X == None:
X = self.initialise_latent(init, input_dim, likelihood.Y)
X = self.initialise_latent(init, input_dim, Y)
self.init = init
if X_variance is None:
@ -44,10 +38,10 @@ class BayesianGPLVM(SparseGP, GPLVM):
if kernel is None:
kernel = kern.rbf(input_dim) # + kern.white(input_dim)
SparseGP.__init__(self, X=X, likelihood=likelihood, kernel=kernel, Z=Z, X_variance=X_variance, name=name, **kwargs)
self.q = Normal(self.X, self.X_variance)
self.add_parameter(self.q, gradient=self._dbound_dmuS, index=0)
self.ensure_default_constraints()
self.q = Normal(X, X_variance)
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, X_variance, name, **kwargs)
self.add_parameter(self.q, index=0)
#self.ensure_default_constraints()
def _getstate(self):
"""
@ -61,42 +55,10 @@ class BayesianGPLVM(SparseGP, GPLVM):
self.init = state.pop()
SparseGP._setstate(self, state)
# def _get_param_names(self):
# X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
# S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
# return (X_names + S_names + SparseGP._get_param_names(self))
#def _get_print_names(self):
# return SparseGP._get_print_names(self)
# def _get_params(self):
# """
# Horizontally stacks the parameters in order to present them to the optimizer.
# The resulting 1-input_dim array has this structure:
#
# ===============================================================
# | mu | S | Z | theta | beta |
# ===============================================================
#
# """
# x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self)))
# return x
# def _set_params(self, x, save_old=True, save_count=0):
# N, input_dim = self.num_data, self.input_dim
# self.X = x[:self.X.size].reshape(N, input_dim).copy()
# self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
# SparseGP._set_params(self, x[(2 * N * input_dim):])
def dKL_dmuS(self):
dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
dKL_dmu = self.X
return dKL_dmu, dKL_dS
def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.dL_dpsi2, self.Z, self.X, self.X_variance)
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance)
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
@ -107,31 +69,25 @@ class BayesianGPLVM(SparseGP, GPLVM):
var_S = np.sum(self.X_variance - np.log(self.X_variance))
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
def log_likelihood(self):
ll = SparseGP.log_likelihood(self)
kl = self.KL_divergence()
return ll - kl
def parameters_changed(self):
super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.KL_divergence()
def _dbound_dmuS(self):
dKL_dmu, dKL_dS = self.dKL_dmuS()
dL_dmu, dL_dS = self.dL_dmuS()
d_dmu = (dL_dmu - dKL_dmu).flatten()
d_dS = (dL_dS - dKL_dS).flatten()
return np.hstack((d_dmu, d_dS))
# def _log_likelihood_gradients(self):
# dKL_dmu, dKL_dS = self.dKL_dmuS()
# dL_dmu, dL_dS = self.dL_dmuS()
# d_dmu = (dL_dmu - dKL_dmu).flatten()
# d_dS = (dL_dS - dKL_dS).flatten()
# self.dbound_dmuS = np.hstack((d_dmu, d_dS))
# self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self)
# return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
# dL:
self.q.mean.gradient = dL_dmu
self.q.variance.gradient = dL_dS
# dKL:
self.q.mean.gradient -= self.X
self.q.variance.gradient -= (1. - (1. / (self.X_variance))) * 0.5
def plot_latent(self, plot_inducing=True, *args, **kwargs):
"""
See GPy.plotting.matplot_dep.dim_reduction_plots.plot_latent
"""
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
@ -181,18 +137,18 @@ class BayesianGPLVM(SparseGP, GPLVM):
"""
dmu_dX = np.zeros_like(Xnew)
for i in range(self.Z.shape[0]):
dmu_dX += self.kern.dK_dX(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
dmu_dX += self.kern.gradients_X(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
return dmu_dX
def dmu_dXnew(self, Xnew):
"""
Individual gradient of prediction at Xnew w.r.t. each sample in Xnew
"""
dK_dX = np.zeros((Xnew.shape[0], self.num_inducing))
gradients_X = np.zeros((Xnew.shape[0], self.num_inducing))
ones = np.ones((1, 1))
for i in range(self.Z.shape[0]):
dK_dX[:, i] = self.kern.dK_dX(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
return np.dot(dK_dX, self.Cpsi1Vf)
gradients_X[:, i] = self.kern.gradients_X(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
return np.dot(gradients_X, self.Cpsi1Vf)
def plot_steepest_gradient_map(self, *args, ** kwargs):
"""
@ -201,7 +157,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_steepest_gradient_map(model,*args,**kwargs)
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""

View file

@ -44,7 +44,7 @@ class BCGPLVM(GPLVM):
GP._set_params(self, x[self.mapping.num_params:])
def _log_likelihood_gradients(self):
dL_df = self.kern.dK_dX(self.dL_dK, self.X)
dL_df = self.kern.gradients_X(self.dL_dK, self.X)
dL_dtheta = self.mapping.df_dtheta(dL_df, self.likelihood.Y)
return np.hstack((dL_dtheta.flatten(), GP._log_likelihood_gradients(self)))

View file

@ -60,7 +60,7 @@ class GPLVM(GP):
def jacobian(self,X):
target = np.zeros((X.shape[0],X.shape[1],self.output_dim))
for i in range(self.output_dim):
target[:,:,i]=self.kern.dK_dX(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
target[:,:,i]=self.kern.gradients_X(np.dot(self.Ki,self.likelihood.Y[:,i])[None, :],X,self.X)
return target
def magnification(self,X):

View file

@ -1,9 +1,10 @@
# ## Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from GPy.core.model import Model
from ..core.model import Model
import itertools
import numpy
from ..core.parameterization import Param
def get_shape(x):
if isinstance(x, numpy.ndarray):
@ -24,42 +25,42 @@ class GradientChecker(Model):
"""
:param f: Function to check gradient for
:param df: Gradient of function to check
:param x0:
:param x0:
Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
Can be a list of arrays, if takes a list of arrays. This list will be passed
Can be a list of arrays, if takes a list of arrays. This list will be passed
to f and df in the same order as given here.
If only one argument, make sure not to pass a list!!!
:type x0: [array-like] | array-like | float | int
:param names:
Names to print, when performing gradcheck. If a list was passed to x0
a list of names with the same length is expected.
:param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
Examples:
---------
from GPy.models import GradientChecker
N, M, Q = 10, 5, 3
Sinusoid:
X = numpy.random.rand(N, Q)
grad = GradientChecker(numpy.sin,numpy.cos,X,'x')
grad.checkgrad(verbose=1)
Using GPy:
X, Z = numpy.random.randn(N,Q), numpy.random.randn(M,Q)
kern = GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True)
grad = GradientChecker(kern.K,
grad = GradientChecker(kern.K,
lambda x: 2*kern.dK_dX(numpy.ones((1,1)), x),
x0 = X.copy(),
names='X')
names='X')
grad.checkgrad(verbose=1)
grad.randomize()
grad.checkgrad(verbose=1)
grad.checkgrad(verbose=1)
"""
Model.__init__(self)
Model.__init__(self, 'GradientChecker')
if isinstance(x0, (list, tuple)) and names is None:
self.shapes = [get_shape(xi) for xi in x0]
self.names = ['X{i}'.format(i=i) for i in range(len(x0))]
@ -72,8 +73,10 @@ class GradientChecker(Model):
else:
self.names = names
self.shapes = [get_shape(x0)]
for name, xi in zip(self.names, at_least_one_element(x0)):
self.__setattr__(name, xi)
self.__setattr__(name, Param(name, xi))
self.add_parameter(self.__getattribute__(name))
# self._param_names = []
# for name, shape in zip(self.names, self.shapes):
# self._param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
@ -93,20 +96,18 @@ class GradientChecker(Model):
def _log_likelihood_gradients(self):
return numpy.atleast_1d(self.df(*self._get_x(), **self.kwargs)).flatten()
#def _get_params(self):
#return numpy.atleast_1d(numpy.hstack(map(lambda name: flatten_if_needed(self.__getattribute__(name)), self.names)))
def _get_params(self):
return numpy.atleast_1d(numpy.hstack(map(lambda name: flatten_if_needed(self.__getattribute__(name)), self.names)))
#def _set_params(self, x):
#current_index = 0
#for name, shape in zip(self.names, self.shapes):
#current_size = numpy.prod(shape)
#self.__setattr__(name, x[current_index:current_index + current_size].reshape(shape))
#current_index += current_size
def _set_params(self, x):
current_index = 0
for name, shape in zip(self.names, self.shapes):
current_size = numpy.prod(shape)
self.__setattr__(name, x[current_index:current_index + current_size].reshape(shape))
current_index += current_size
def _get_param_names(self):
_param_names = []
for name, shape in zip(self.names, self.shapes):
_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
return _param_names
#def _get_param_names(self):
#_param_names = []
#for name, shape in zip(self.names, self.shapes):
#_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
#return _param_names

View file

@ -16,44 +16,83 @@ class SparseGPRegression(SparseGP):
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None
:param num_inducing: number of inducing points (ignored if Z is passed, see note)
:type num_inducing: int
:rtype: model object
:param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (num_data x input_dim) | None
.. Note:: If no Z array is passed, num_inducing (default 10) points are selected from the data. Other wise num_inducing is ignored
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10, X_variance=None):
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None):
num_data, input_dim = X.shape
# kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.rbf(X.shape[1]) # + kern.white(X.shape[1], 1e-3)
kernel = kern.rbf(input_dim)# + kern.white(input_dim, variance=1e-3)
# Z defaults to a subset of the data
if Z is None:
i = np.random.permutation(X.shape[0])[:num_inducing]
i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
Z = X[i].copy()
else:
assert Z.shape[1] == X.shape[1]
assert Z.shape[1] == input_dim
# likelihood defaults to Gaussian
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
likelihood = likelihoods.Gaussian()
SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance)
self.ensure_default_constraints()
pass
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance)
#self.ensure_default_constraints()
def _getstate(self):
return SparseGP._getstate(self)
def _setstate(self, state):
return SparseGP._setstate(self, state)
pass
class SparseGPRegressionUncertainInput(SparseGP):
"""
Gaussian Process model for regression with Gaussian variance on the inputs (X_variance)
This is a thin wrapper around the SparseGP class, with a set of sensible defalts
"""
def __init__(self, X, X_variance, Y, kernel=None, Z=None, num_inducing=10):
"""
:param X: input observations
:type X: np.ndarray (num_data x input_dim)
:param X_variance: The uncertainty in the measurements of X (Gaussian variance, optional)
:type X_variance: np.ndarray (num_data x input_dim)
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None
:param num_inducing: number of inducing points (ignored if Z is passed, see note)
:type num_inducing: int
:rtype: model object
.. Note:: If no Z array is passed, num_inducing (default 10) points are selected from the data. Other wise num_inducing is ignored
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
num_data, input_dim = X.shape
# kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.rbf(input_dim) + kern.white(input_dim, variance=1e-3)
# Z defaults to a subset of the data
if Z is None:
i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
Z = X[i].copy()
else:
assert Z.shape[1] == input_dim
likelihood = likelihoods.Gaussian()
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance)
self.ensure_default_constraints()

View file

@ -52,7 +52,7 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
def dL_dX(self):
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X)
dL_dX += self.kern.dK_dX(self.dL_dpsi1, self.X, self.Z)
dL_dX += self.kern.gradients_X(self.dL_dpsi1, self.X, self.Z)
return dL_dX

View file

@ -7,6 +7,7 @@ import pylab as pb
import Tango
from matplotlib.textpath import TextPath
from matplotlib.transforms import offset_copy
from ...kern.parts.linear import Linear
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):

View file

@ -5,6 +5,7 @@ import pylab as pb
import numpy as np
import Tango
from base_plots import gpplot, x_frame1D, x_frame2D
from ...util.misc import param_to_array
def plot_fit(model, plot_limits=None, which_data_rows='all',
@ -95,8 +96,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#add inducing inputs (if a sparse model is used)
if hasattr(model,"Z"):
Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
Zu = param_to_array(model.Z[:,free_dims])
z_height = ax.get_ylim()[0]
ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
#add error bars for uncertain (if input uncertainty is being modelled)
if hasattr(model,"has_uncertain_inputs"):
@ -144,18 +148,19 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#add inducing inputs (if a sparse model is used)
if hasattr(model,"Z"):
Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
Zu = model.Z[:,free_dims]
ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo')
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot_f_fit(model, *args, **kwargs):
def plot_fit_f(model, *args, **kwargs):
"""
Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
All args and kwargs are passed on to models_plots.plot.
"""
kwargs['plot_raw'] = True
plot(model,*args, **kwargs)
plot_fit(model,*args, **kwargs)

View file

@ -1,4 +1,5 @@
import pylab as pb
import pylab as pb, numpy as np
from ...util.misc import param_to_array
def plot(parameterized, fignum=None, ax=None, colors=None):
"""
@ -13,14 +14,14 @@ def plot(parameterized, fignum=None, ax=None, colors=None):
"""
if ax is None:
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.means.shape[1]))))
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1]))))
if colors is None:
colors = pb.gca()._get_lines.color_cycle
pb.clf()
else:
colors = iter(colors)
plots = []
means, variances = param_to_array(parameterized.means, parameterized.variances)
means, variances = param_to_array(parameterized.mean, parameterized.variance)
x = np.arange(means.shape[0])
for i in range(means.shape[1]):
if ax is None:

View file

@ -5,7 +5,7 @@ import unittest
import numpy as np
import GPy
verbose = False
verbose = True
try:
import sympy

View file

@ -4,10 +4,11 @@ import GPy
from GPy.models import GradientChecker
import functools
import inspect
from GPy.likelihoods.noise_models import gp_transformations
from GPy.likelihoods import link_functions
from ..core.parameterization import Param
from functools import partial
#np.random.seed(300)
np.random.seed(7)
#np.random.seed(7)
def dparam_partial(inst_func, *args):
"""
@ -22,12 +23,14 @@ def dparam_partial(inst_func, *args):
the f or Y that are being used in the function whilst we tweak the
param
"""
def param_func(param, inst_func, args):
inst_func.im_self._set_params(param)
def param_func(param_val, param_name, inst_func, args):
#inst_func.im_self._set_params(param)
#inst_func.im_self.add_parameter(Param(param_name, param_val))
inst_func.im_self[param_name] = param_val
return inst_func(*args)
return functools.partial(param_func, inst_func=inst_func, args=args)
def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=False, verbose=False):
def dparam_checkgrad(func, dfunc, params, params_names, args, constraints=None, randomize=False, verbose=False):
"""
checkgrad expects a f: R^N -> R^1 and df: R^N -> R^N
However if we are holding other parameters fixed and moving something else
@ -38,27 +41,34 @@ def dparam_checkgrad(func, dfunc, params, args, constraints=None, randomize=Fals
The number of parameters and N is the number of data
Need to take a slice out from f and a slice out of df
"""
#print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__,
#func.__name__, dfunc.__name__)
print "\n{} likelihood: {} vs {}".format(func.im_self.__class__.__name__,
func.__name__, dfunc.__name__)
partial_f = dparam_partial(func, *args)
partial_df = dparam_partial(dfunc, *args)
gradchecking = True
for param in params:
fnum = np.atleast_1d(partial_f(param)).shape[0]
dfnum = np.atleast_1d(partial_df(param)).shape[0]
zipped_params = zip(params, params_names)
for param_ind, (param_val, param_name) in enumerate(zipped_params):
#Check one parameter at a time, make sure it is 2d (as some gradients only return arrays) then strip out the parameter
fnum = np.atleast_2d(partial_f(param_val, param_name))[:, param_ind].shape[0]
dfnum = np.atleast_2d(partial_df(param_val, param_name))[:, param_ind].shape[0]
for fixed_val in range(dfnum):
#dlik and dlik_dvar gives back 1 value for each
f_ind = min(fnum, fixed_val+1) - 1
print "fnum: {} dfnum: {} f_ind: {} fixed_val: {}".format(fnum, dfnum, f_ind, fixed_val)
#Make grad checker with this param moving, note that set_params is NOT being called
#The parameter is being set directly with __setattr__
grad = GradientChecker(lambda x: np.atleast_1d(partial_f(x))[f_ind],
lambda x : np.atleast_1d(partial_df(x))[fixed_val],
param, 'p')
#This is not general for more than one param...
#Check only the parameter and function value we wish to check at a time
grad = GradientChecker(lambda p_val: np.atleast_2d(partial_f(p_val, param_name))[f_ind, param_ind],
lambda p_val: np.atleast_2d(partial_df(p_val, param_name))[fixed_val, param_ind],
param_val, [param_name])
if constraints is not None:
for constraint in constraints:
constraint('p', grad)
for constrain_param, constraint in constraints:
if grad.grep_param_names(constrain_param):
constraint(constrain_param, grad)
else:
print "parameter didn't exist"
print constrain_param, " ", constraint
if randomize:
grad.randomize()
if verbose:
@ -76,7 +86,7 @@ class TestNoiseModels(object):
Generic model checker
"""
def setUp(self):
self.N = 5
self.N = 15
self.D = 3
self.X = np.random.rand(self.N, self.D)*10
@ -94,7 +104,7 @@ class TestNoiseModels(object):
self.var = np.random.rand(1)
#Make a bigger step as lower bound can be quite curved
self.step = 1e-3
self.step = 1e-4
def tearDown(self):
self.Y = None
@ -107,17 +117,20 @@ class TestNoiseModels(object):
####################################################
# Constraint wrappers so we can just list them off #
####################################################
def constrain_fixed(regex, model):
model[regex].constrain_fixed()
def constrain_negative(regex, model):
model.constrain_negative(regex)
model[regex].constrain_negative()
def constrain_positive(regex, model):
model.constrain_positive(regex)
model[regex].constrain_positive()
def constrain_bounded(regex, model, lower, upper):
"""
Used like: partial(constrain_bounded, lower=0, upper=1)
"""
model.constrain_bounded(regex, lower, upper)
model[regex].constrain_bounded(lower, upper)
"""
Dictionary where we nest models we would like to check
@ -134,71 +147,81 @@ class TestNoiseModels(object):
}
"""
noise_models = {"Student_t_default": {
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": {
"names": ["t_noise"],
"vals": [self.var],
"constraints": [constrain_positive]
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
#"constraints": [("t_noise", constrain_positive), ("deg_free", partial(constrain_fixed, value=5))]
},
"laplace": True
},
"Student_t_1_var": {
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": {
"names": ["t_noise"],
"vals": [1.0],
"constraints": [constrain_positive]
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
},
"laplace": True
},
"Student_t_small_deg_free": {
"model": GPy.likelihoods.StudentT(deg_free=1.5, sigma2=self.var),
"grad_params": {
"names": ["t_noise"],
"vals": [self.var],
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
},
"laplace": True
},
"Student_t_small_var": {
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": {
"names": ["t_noise"],
"vals": [0.01],
"constraints": [constrain_positive]
"vals": [0.0001],
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
},
"laplace": True
},
"Student_t_large_var": {
"model": GPy.likelihoods.student_t(deg_free=5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var),
"grad_params": {
"names": ["t_noise"],
"vals": [10.0],
"constraints": [constrain_positive]
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
},
"laplace": True
},
"Student_t_approx_gauss": {
"model": GPy.likelihoods.student_t(deg_free=1000, sigma2=self.var),
"model": GPy.likelihoods.StudentT(deg_free=1000, sigma2=self.var),
"grad_params": {
"names": ["t_noise"],
"vals": [self.var],
"constraints": [constrain_positive]
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
},
"laplace": True
},
"Student_t_log": {
"model": GPy.likelihoods.student_t(gp_link=gp_transformations.Log(), deg_free=5, sigma2=self.var),
"model": GPy.likelihoods.StudentT(gp_link=link_functions.Log(), deg_free=5, sigma2=self.var),
"grad_params": {
"names": ["t_noise"],
"vals": [self.var],
"constraints": [constrain_positive]
"constraints": [("t_noise", constrain_positive), ("deg_free", constrain_fixed)]
},
"laplace": True
},
"Gaussian_default": {
"model": GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N),
"model": GPy.likelihoods.Gaussian(variance=self.var),
"grad_params": {
"names": ["noise_model_variance"],
"names": ["variance"],
"vals": [self.var],
"constraints": [constrain_positive]
"constraints": [("variance", constrain_positive)]
},
"laplace": True,
"ep": True
},
#"Gaussian_log": {
#"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log(), variance=self.var, D=self.D, N=self.N),
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log(), variance=self.var, D=self.D, N=self.N),
#"grad_params": {
#"names": ["noise_model_variance"],
#"vals": [self.var],
@ -207,7 +230,7 @@ class TestNoiseModels(object):
#"laplace": True
#},
#"Gaussian_probit": {
#"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Probit(), variance=self.var, D=self.D, N=self.N),
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Probit(), variance=self.var, D=self.D, N=self.N),
#"grad_params": {
#"names": ["noise_model_variance"],
#"vals": [self.var],
@ -216,7 +239,7 @@ class TestNoiseModels(object):
#"laplace": True
#},
#"Gaussian_log_ex": {
#"model": GPy.likelihoods.gaussian(gp_link=gp_transformations.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
#"model": GPy.likelihoods.gaussian(gp_link=link_functions.Log_ex_1(), variance=self.var, D=self.D, N=self.N),
#"grad_params": {
#"names": ["noise_model_variance"],
#"vals": [self.var],
@ -225,31 +248,31 @@ class TestNoiseModels(object):
#"laplace": True
#},
"Bernoulli_default": {
"model": GPy.likelihoods.bernoulli(),
"model": GPy.likelihoods.Bernoulli(),
"link_f_constraints": [partial(constrain_bounded, lower=0, upper=1)],
"laplace": True,
"Y": self.binary_Y,
"ep": True
},
"Exponential_default": {
"model": GPy.likelihoods.exponential(),
"link_f_constraints": [constrain_positive],
"Y": self.positive_Y,
"laplace": True,
},
"Poisson_default": {
"model": GPy.likelihoods.poisson(),
"link_f_constraints": [constrain_positive],
"Y": self.integer_Y,
"laplace": True,
"ep": False #Should work though...
},
"Gamma_default": {
"model": GPy.likelihoods.gamma(),
"link_f_constraints": [constrain_positive],
"Y": self.positive_Y,
"laplace": True
}
#"Exponential_default": {
#"model": GPy.likelihoods.exponential(),
#"link_f_constraints": [constrain_positive],
#"Y": self.positive_Y,
#"laplace": True,
#},
#"Poisson_default": {
#"model": GPy.likelihoods.poisson(),
#"link_f_constraints": [constrain_positive],
#"Y": self.integer_Y,
#"laplace": True,
#"ep": False #Should work though...
#},
#"Gamma_default": {
#"model": GPy.likelihoods.gamma(),
#"link_f_constraints": [constrain_positive],
#"Y": self.positive_Y,
#"laplace": True
#}
}
for name, attributes in noise_models.iteritems():
@ -286,8 +309,8 @@ class TestNoiseModels(object):
else:
ep = False
if len(param_vals) > 1:
raise NotImplementedError("Cannot support multiple params in likelihood yet!")
#if len(param_vals) > 1:
#raise NotImplementedError("Cannot support multiple params in likelihood yet!")
#Required by all
#Normal derivatives
@ -302,13 +325,13 @@ class TestNoiseModels(object):
yield self.t_d3logpdf_df3, model, Y, f
yield self.t_d3logpdf_dlink3, model, Y, f, link_f_constraints
#Params
yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_constraints
yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_constraints
yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_constraints
yield self.t_dlogpdf_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_dlogpdf_df_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_d2logpdf2_df2_dparams, model, Y, f, param_vals, param_names, param_constraints
#Link params
yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_constraints
yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_constraints
yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_constraints
yield self.t_dlogpdf_link_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_dlogpdf_dlink_dparams, model, Y, f, param_vals, param_names, param_constraints
yield self.t_d2logpdf2_dlink2_dparams, model, Y, f, param_vals, param_names, param_constraints
#laplace likelihood gradcheck
yield self.t_laplace_fit_rbf_white, model, self.X, Y, f, self.step, param_vals, param_names, param_constraints
@ -370,33 +393,33 @@ class TestNoiseModels(object):
# df_dparams #
##############
@with_setup(setUp, tearDown)
def t_dlogpdf_dparams(self, model, Y, f, params, param_constraints):
def t_dlogpdf_dparams(self, model, Y, f, params, params_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.logpdf, model.dlogpdf_dtheta,
params, args=(f, Y), constraints=param_constraints,
randomize=True, verbose=True)
params, params_names, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True)
)
@with_setup(setUp, tearDown)
def t_dlogpdf_df_dparams(self, model, Y, f, params, param_constraints):
def t_dlogpdf_df_dparams(self, model, Y, f, params, params_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.dlogpdf_df, model.dlogpdf_df_dtheta,
params, args=(f, Y), constraints=param_constraints,
randomize=True, verbose=True)
params, params_names, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True)
)
@with_setup(setUp, tearDown)
def t_d2logpdf2_df2_dparams(self, model, Y, f, params, param_constraints):
def t_d2logpdf2_df2_dparams(self, model, Y, f, params, params_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.d2logpdf_df2, model.d2logpdf_df2_dtheta,
params, args=(f, Y), constraints=param_constraints,
randomize=True, verbose=True)
params, params_names, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True)
)
################
@ -454,32 +477,32 @@ class TestNoiseModels(object):
# dlink_dparams #
#################
@with_setup(setUp, tearDown)
def t_dlogpdf_link_dparams(self, model, Y, f, params, param_constraints):
def t_dlogpdf_link_dparams(self, model, Y, f, params, param_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.logpdf_link, model.dlogpdf_link_dtheta,
params, args=(f, Y), constraints=param_constraints,
params, param_names, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True)
)
@with_setup(setUp, tearDown)
def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_constraints):
def t_dlogpdf_dlink_dparams(self, model, Y, f, params, param_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.dlogpdf_dlink, model.dlogpdf_dlink_dtheta,
params, args=(f, Y), constraints=param_constraints,
params, param_names, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True)
)
@with_setup(setUp, tearDown)
def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_constraints):
def t_d2logpdf2_dlink2_dparams(self, model, Y, f, params, param_names, param_constraints):
print "\n{}".format(inspect.stack()[0][3])
print model
assert (
dparam_checkgrad(model.d2logpdf_dlink2, model.d2logpdf_dlink2_dtheta,
params, args=(f, Y), constraints=param_constraints,
params, param_names, args=(f, Y), constraints=param_constraints,
randomize=False, verbose=True)
)
@ -493,18 +516,23 @@ class TestNoiseModels(object):
Y = Y/Y.max()
white_var = 1e-6
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), model)
m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=laplace_likelihood)
laplace_likelihood = GPy.inference.latent_function_inference.LaplaceInference()
m = GPy.core.GP(X.copy(), Y.copy(), kernel, likelihood=model, inference_method=laplace_likelihood)
m.ensure_default_constraints()
m.constrain_fixed('white', white_var)
m['white'].constrain_fixed(white_var)
for param_num in range(len(param_names)):
name = param_names[param_num]
m[name] = param_vals[param_num]
constraints[param_num](name, m)
#Set constraints
for constrain_param, constraint in constraints:
constraint(constrain_param, m)
print m
m.randomize()
#Set params
for param_num in range(len(param_names)):
name = param_names[param_num]
m[name] = param_vals[param_num]
#m.optimize(max_iters=8)
print m
m.checkgrad(verbose=1, step=step)
@ -525,10 +553,10 @@ class TestNoiseModels(object):
Y = Y/Y.max()
white_var = 1e-6
kernel = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
ep_likelihood = GPy.likelihoods.EP(Y.copy(), model)
m = GPy.models.GPRegression(X.copy(), Y.copy(), kernel, likelihood=ep_likelihood)
ep_inf = GPy.inference.latent_function_inference.EP()
m = GPy.core.GP(X.copy(), Y.copy(), kernel=kernel, likelihood=model, inference_method=ep_inf)
m.ensure_default_constraints()
m.constrain_fixed('white', white_var)
m['white'].constrain_fixed(white_var)
for param_num in range(len(param_names)):
name = param_names[param_num]
@ -559,8 +587,8 @@ class LaplaceTests(unittest.TestCase):
self.var = 0.2
self.var = np.random.rand(1)
self.stu_t = GPy.likelihoods.student_t(deg_free=5, sigma2=self.var)
self.gauss = GPy.likelihoods.gaussian(gp_transformations.Log(), variance=self.var, D=self.D, N=self.N)
self.stu_t = GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var)
self.gauss = GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var)
#Make a bigger step as lower bound can be quite curved
self.step = 1e-6
@ -584,7 +612,7 @@ class LaplaceTests(unittest.TestCase):
noise = np.random.randn(*self.X.shape)*self.real_std
self.Y = np.sin(self.X*2*np.pi) + noise
self.f = np.random.rand(self.N, 1)
self.gauss = GPy.likelihoods.gaussian(variance=self.var, D=self.D, N=self.N)
self.gauss = GPy.likelihoods.Gaussian(variance=self.var)
dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y)
d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y)
@ -605,23 +633,27 @@ class LaplaceTests(unittest.TestCase):
#Yc = Y.copy()
#Yc[75:80] += 1
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
kernel2 = kernel1.copy()
#FIXME: Make sure you can copy kernels when params is fixed
#kernel2 = kernel1.copy()
kernel2 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1])
m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
m1.constrain_fixed('white', 1e-6)
m1['noise'] = initial_var_guess
m1.constrain_bounded('noise', 1e-4, 10)
m1.constrain_bounded('rbf', 1e-4, 10)
gauss_distr1 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
exact_inf = GPy.inference.latent_function_inference.ExactGaussianInference()
m1 = GPy.core.GP(X, Y.copy(), kernel=kernel1, likelihood=gauss_distr1, inference_method=exact_inf)
m1['white'].constrain_fixed(1e-6)
m1['variance'] = initial_var_guess
m1['variance'].constrain_bounded(1e-4, 10)
m1['rbf'].constrain_bounded(1e-4, 10)
m1.ensure_default_constraints()
m1.randomize()
gauss_distr = GPy.likelihoods.gaussian(variance=initial_var_guess, D=1, N=Y.shape[0])
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), gauss_distr)
m2 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel2, likelihood=laplace_likelihood)
gauss_distr2 = GPy.likelihoods.Gaussian(variance=initial_var_guess)
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
m2 = GPy.core.GP(X, Y.copy(), kernel=kernel2, likelihood=gauss_distr2, inference_method=laplace_inf)
m2.ensure_default_constraints()
m2.constrain_fixed('white', 1e-6)
m2.constrain_bounded('rbf', 1e-4, 10)
m2.constrain_bounded('noise', 1e-4, 10)
m2['white'].constrain_fixed(1e-6)
m2['rbf'].constrain_bounded(1e-4, 10)
m2['variance'].constrain_bounded(1e-4, 10)
m2.randomize()
if debug:
@ -667,7 +699,7 @@ class LaplaceTests(unittest.TestCase):
#Check Y's are the same
np.testing.assert_almost_equal(Y, m2.likelihood.Y, decimal=5)
np.testing.assert_almost_equal(m1.Y, m2.Y, decimal=5)
#Check marginals are the same
np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2)
#Check marginals are the same with random

View file

@ -9,42 +9,44 @@ import numpy
import GPy
import itertools
from GPy.core import Model
from GPy.core.parameterization.param import Param
from GPy.core.parameterization.transformations import Logexp
class PsiStatModel(Model):
def __init__(self, which, X, X_variance, Z, num_inducing, kernel):
super(PsiStatModel, self).__init__(name='psi stat test')
self.which = which
self.X = X
self.X_variance = X_variance
self.Z = Z
self.X = Param("X", X)
self.X_variance = Param('X_variance', X_variance, Logexp())
self.Z = Param("Z", Z)
self.N, self.input_dim = X.shape
self.num_inducing, input_dim = Z.shape
assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
self.kern = kernel
super(PsiStatModel, self).__init__()
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
def _get_param_names(self):
Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.input_dim))]
Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.num_inducing), range(self.input_dim))]
return Xnames + Znames + self.kern._get_param_names()
def _get_params(self):
return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()])
def _set_params(self, x, save_old=True, save_count=0):
start, end = 0, self.X.size
self.X = x[start:end].reshape(self.N, self.input_dim)
start, end = end, end + self.X_variance.size
self.X_variance = x[start: end].reshape(self.N, self.input_dim)
start, end = end, end + self.Z.size
self.Z = x[start: end].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(x[end:])
self.add_parameters(self.X, self.X_variance, self.Z, self.kern)
def log_likelihood(self):
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
def _log_likelihood_gradients(self):
def parameters_changed(self):
psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
self.X.gradient = psimu
self.X_variance.gradient = psiS
#psimu, psiS = numpy.ones(self.N * self.input_dim), numpy.ones(self.N * self.input_dim)
psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
try: psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
except AttributeError: psiZ = numpy.zeros_like(self.Z)
self.Z.gradient = psiZ
#psiZ = numpy.ones(self.num_inducing * self.input_dim)
thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten()
return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad))
N,M = self.X.shape[0], self.Z.shape[0]
dL_dpsi0, dL_dpsi1, dL_dpsi2 = numpy.zeros([N]), numpy.zeros([N,M]), numpy.zeros([N,M,M])
if self.which == 'psi0': dL_dpsi0 += 1
if self.which == 'psi1': dL_dpsi1 += 1
if self.which == 'psi2': dL_dpsi2 += 1
self.kern.update_gradients_variational(numpy.zeros([1,1]),
dL_dpsi0,
dL_dpsi1,
dL_dpsi2, self.X, self.X_variance, self.Z)
class DPsiStatTest(unittest.TestCase):
input_dim = 5
@ -57,61 +59,58 @@ class DPsiStatTest(unittest.TestCase):
Y = X.dot(numpy.random.randn(input_dim, input_dim))
# kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)]
kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim),
GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim),
GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)]
kernels = [
GPy.kern.linear(input_dim),
GPy.kern.rbf(input_dim),
#GPy.kern.bias(input_dim),
#GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim),
#GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)
]
def testPsi0(self):
for k in self.kernels:
m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,\
num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints()
m.randomize()
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts)))
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi1(self):
for k in self.kernels:
m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints()
m.randomize()
assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_lin(self):
k = self.kernels[0]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints()
m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_lin_bia(self):
k = self.kernels[3]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints()
m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_rbf(self):
k = self.kernels[1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints()
m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_rbf_bia(self):
k = self.kernels[-1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints()
m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
def testPsi2_bia(self):
k = self.kernels[2]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
num_inducing=self.num_inducing, kernel=k)
m.ensure_default_constraints()
m.randomize()
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k._parameters_)))
if __name__ == "__main__":

View file

@ -10,6 +10,7 @@ import datasets
import mocap
import decorators
import classification
import caching
try:
import sympy

40
GPy/util/caching.py Normal file
View file

@ -0,0 +1,40 @@
from ..core.parameterization.array_core import ObservableArray, ParamList
class Cacher(object):
def __init__(self, operation, limit=5):
self.limit = int(limit)
self.operation=operation
self.cached_inputs = ParamList([])
self.cached_outputs = []
self.inputs_changed = []
def __call__(self, X):
assert isinstance(X, ObservableArray)
if X in self.cached_inputs:
i = self.cached_inputs.index(X)
if self.inputs_changed[i]:
self.cached_outputs[i] = self.operation(X)
self.inputs_changed[i] = False
return self.cached_outputs[i]
else:
if len(self.cached_inputs) == self.limit:
X_ = self.cached_inputs.pop(0)
X_.remove_observer(self)
self.inputs_changed.pop(0)
self.cached_outputs.pop(0)
self.cached_inputs.append(X)
self.cached_outputs.append(self.operation(X))
self.inputs_changed.append(False)
X.add_observer(self, self.on_cache_changed)
return self.cached_outputs[-1]
def on_cache_changed(self, X):
print id(X)
i = self.cached_inputs.index(X)
self.inputs_changed[i] = True

View file

@ -41,6 +41,17 @@ else:
_blas_available = False
warnings.warn("warning: caught this exception:" + str(e))
def dtrtri(L, lower=0):
"""
Wrapper for lapack dtrtrs function
Inverse of L
:param L: Triangular Matrix L
:param lower: is matrix lower (true) or upper (false)
:returns: Li, info
"""
return lapack.dtrtri(L, lower=lower)
def dtrtrs(A, B, lower=0, trans=0, unitdiag=0):
"""
Wrapper for lapack dtrtrs function

View file

@ -184,4 +184,4 @@ from :class:ndarray)"""
assert len(param) > 0, "At least one parameter needed"
if len(param) == 1:
return param[0].view(np.ndarray)
return map(lambda x: x.view(np.ndarray), param)
return [x.view(np.ndarray) for x in param]