very weird merge conflict, including in files that I did not change

This commit is contained in:
James Hensman 2014-03-18 16:46:37 +00:00
commit 601175de2d
73 changed files with 2234 additions and 1567 deletions

View file

@ -7,10 +7,10 @@ import warnings
from .. import kern from .. import kern
from ..util.linalg import dtrtrs from ..util.linalg import dtrtrs
from model import Model from model import Model
from parameterization import ObservableArray from parameterization import ObsAr
from .. import likelihoods from .. import likelihoods
from ..likelihoods.gaussian import Gaussian from ..likelihoods.gaussian import Gaussian
from ..inference.latent_function_inference import exact_gaussian_inference from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from parameterization.variational import VariationalPosterior from parameterization.variational import VariationalPosterior
class GP(Model): class GP(Model):
@ -27,28 +27,26 @@ class GP(Model):
""" """
def __init__(self, X, Y, kernel, likelihood, inference_method=None, Y_metadata=None, name='gp'): def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', Y_metadata=None):
super(GP, self).__init__(name) super(GP, self).__init__(name)
assert X.ndim == 2 assert X.ndim == 2
if isinstance(X, (ObservableArray, VariationalPosterior)): if isinstance(X, (ObsAr, VariationalPosterior)):
self.X = X self.X = X
else: self.X = ObservableArray(X) else: self.X = ObsAr(X)
self.num_data, self.input_dim = self.X.shape self.num_data, self.input_dim = self.X.shape
assert Y.ndim == 2 assert Y.ndim == 2
self.Y = ObservableArray(Y) self.Y = ObsAr(Y)
assert Y.shape[0] == self.num_data assert Y.shape[0] == self.num_data
_, self.output_dim = self.Y.shape _, self.output_dim = self.Y.shape
if Y_metadata is not None: #TODO: check the type of this is okay?
self.Y_metadata = ObservableArray(Y_metadata) self.Y_metadata = Y_metadata
else:
self.Y_metadata = None
assert isinstance(kernel, kern.Kern) assert isinstance(kernel, kern.Kern)
assert self.input_dim == kernel.input_dim #assert self.input_dim == kernel.input_dim
self.kern = kernel self.kern = kernel
assert isinstance(likelihood, likelihoods.Likelihood) assert isinstance(likelihood, likelihoods.Likelihood)
@ -56,10 +54,10 @@ class GP(Model):
#find a sensible inference method #find a sensible inference method
if inference_method is None: if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian): if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise):
inference_method = exact_gaussian_inference.ExactGaussianInference() inference_method = exact_gaussian_inference.ExactGaussianInference()
else: else:
inference_method = expectation_propagation inference_method = expectation_propagation.EP()
print "defaulting to ", inference_method, "for latent function inference" print "defaulting to ", inference_method, "for latent function inference"
self.inference_method = inference_method self.inference_method = inference_method
@ -67,8 +65,9 @@ class GP(Model):
self.add_parameter(self.likelihood) self.add_parameter(self.likelihood)
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, Y_metadata=self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, self.Y_metadata)
self.kern.update_gradients_full(grad_dict['dL_dK'], self.X) self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
def log_likelihood(self): def log_likelihood(self):
return self._log_marginal_likelihood return self._log_marginal_likelihood
@ -96,9 +95,12 @@ class GP(Model):
#var = Kxx - np.sum(LiKx*LiKx, 0) #var = Kxx - np.sum(LiKx*LiKx, 0)
var = Kxx - np.sum(WiKx*Kx, 0) var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1) var = var.reshape(-1, 1)
#force mu to be a column vector
if len(mu.shape)==1: mu = mu[:,None]
return mu, var return mu, var
def predict(self, Xnew, full_cov=False, **likelihood_args): def predict(self, Xnew, full_cov=False, Y_metadata=None):
""" """
Predict the function(s) at the new point(s) Xnew. Predict the function(s) at the new point(s) Xnew.
@ -122,8 +124,12 @@ class GP(Model):
mu, var = self._raw_predict(Xnew, full_cov=full_cov) mu, var = self._raw_predict(Xnew, full_cov=full_cov)
# now push through likelihood # now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args) mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
return mean, var, _025pm, _975pm return mean, var
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
m, v = self._raw_predict(X, full_cov=False)
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
def posterior_samples_f(self,X,size=10, full_cov=True): def posterior_samples_f(self,X,size=10, full_cov=True):
""" """
@ -146,7 +152,7 @@ class GP(Model):
return Ysim return Ysim
def posterior_samples(self,X,size=10, full_cov=True,noise_model=None): def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None):
""" """
Samples the posterior GP at the points X. Samples the posterior GP at the points X.
@ -161,15 +167,7 @@ class GP(Model):
:returns: Ysim: set of simulations, a Numpy array (N x samples). :returns: Ysim: set of simulations, a Numpy array (N x samples).
""" """
Ysim = self.posterior_samples_f(X, size, full_cov=full_cov) Ysim = self.posterior_samples_f(X, size, full_cov=full_cov)
if isinstance(self.likelihood, Gaussian): Ysim = self.likelihood.samples(Ysim, Y_metadata)
noise_std = np.sqrt(self.likelihood._get_params())
Ysim += np.random.normal(0,noise_std,Ysim.shape)
elif isinstance(self.likelihood, Gaussian_Mixed_Noise):
assert noise_model is not None, "A noise model must be specified."
noise_std = np.sqrt(self.likelihood._get_params()[noise_model])
Ysim += np.random.normal(0,noise_std,Ysim.shape)
else:
Ysim = self.likelihood.noise_model.samples(Ysim)
return Ysim return Ysim
@ -185,7 +183,7 @@ class GP(Model):
""" """
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots from ..plotting.matplot_dep import models_plots
models_plots.plot_fit_f(self,*args,**kwargs) return models_plots.plot_fit_f(self,*args,**kwargs)
def plot(self, *args, **kwargs): def plot(self, *args, **kwargs):
""" """
@ -206,7 +204,7 @@ class GP(Model):
""" """
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots from ..plotting.matplot_dep import models_plots
models_plots.plot_fit(self,*args,**kwargs) return models_plots.plot_fit(self,*args,**kwargs)
def _getstate(self): def _getstate(self):
""" """

View file

@ -301,9 +301,7 @@ class Model(Parameterized):
denominator = (2 * np.dot(dx, gradient)) denominator = (2 * np.dot(dx, gradient))
global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator) global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator)
gloabl_diff = (f1 - f2) - denominator return np.abs(1. - global_ratio) < tolerance or np.abs(f1-f2).sum() + np.abs((2 * np.dot(dx, gradient))).sum() < tolerance
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gloabl_diff) < tolerance)
else: else:
# check the gradient of each parameter individually, and do some pretty printing # check the gradient of each parameter individually, and do some pretty printing
try: try:
@ -339,7 +337,7 @@ class Model(Parameterized):
print "No free parameters to check" print "No free parameters to check"
return return
gradient = self.objective_function_gradients(x) gradient = self.objective_function_gradients(x).copy()
np.where(gradient == 0, 1e-312, gradient) np.where(gradient == 0, 1e-312, gradient)
ret = True ret = True
for nind, xind in itertools.izip(param_index, transformed_index): for nind, xind in itertools.izip(param_index, transformed_index):

View file

@ -1,5 +1,5 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from param import Param, ObservableArray from param import Param, ObsAr
from parameterized import Parameterized from parameterized import Parameterized

View file

@ -1,29 +1,27 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
__updated__ = '2013-12-16' __updated__ = '2014-03-17'
import numpy as np import numpy as np
from parameter_core import Observable from parameter_core import Observable
class ObservableArray(np.ndarray, Observable): class ObsAr(np.ndarray, Observable):
""" """
An ndarray which reports changes to its observers. An ndarray which reports changes to its observers.
The observers can add themselves with a callable, which The observers can add themselves with a callable, which
will be called every time this array changes. The callable will be called every time this array changes. The callable
takes exactly one argument, which is this array itself. takes exactly one argument, which is this array itself.
""" """
__array_priority__ = -1 # Never give back ObservableArray __array_priority__ = -1 # Never give back ObsAr
def __new__(cls, input_array): def __new__(cls, input_array, *a, **kw):
if not isinstance(input_array, ObservableArray): if not isinstance(input_array, ObsAr):
obj = np.atleast_1d(input_array).view(cls) obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls)
else: obj = input_array else: obj = input_array
cls.__name__ = "ObservableArray\n " #cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing
super(ObsAr, obj).__init__(*a, **kw)
return obj return obj
def __init__(self, *a, **kw):
super(ObservableArray, self).__init__(*a, **kw)
def __array_finalize__(self, obj): def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments # see InfoArray.__array_finalize__ for comments
if obj is None: return if obj is None: return
@ -32,6 +30,14 @@ class ObservableArray(np.ndarray, Observable):
def __array_wrap__(self, out_arr, context=None): def __array_wrap__(self, out_arr, context=None):
return out_arr.view(np.ndarray) return out_arr.view(np.ndarray)
def __reduce__(self):
func, args, state = np.ndarray.__reduce__(self)
return func, args, (state, Observable._getstate(self))
def __setstate__(self, state):
np.ndarray.__setstate__(self, state[0])
Observable._setstate(self, state[1])
def _s_not_empty(self, s): def _s_not_empty(self, s):
# this checks whether there is something picked by this slice. # this checks whether there is something picked by this slice.
return True return True
@ -48,7 +54,7 @@ class ObservableArray(np.ndarray, Observable):
def __setitem__(self, s, val): def __setitem__(self, s, val):
if self._s_not_empty(s): if self._s_not_empty(s):
super(ObservableArray, self).__setitem__(s, val) super(ObsAr, self).__setitem__(s, val)
self.notify_observers(self[s]) self.notify_observers(self[s])
def __getslice__(self, start, stop): def __getslice__(self, start, stop):
@ -58,7 +64,7 @@ class ObservableArray(np.ndarray, Observable):
return self.__setitem__(slice(start, stop), val) return self.__setitem__(slice(start, stop), val)
def __copy__(self, *args): def __copy__(self, *args):
return ObservableArray(self.view(np.ndarray).copy()) return ObsAr(self.view(np.ndarray).copy())
def copy(self, *args): def copy(self, *args):
return self.__copy__(*args) return self.__copy__(*args)

View file

@ -25,11 +25,10 @@ class ParameterIndexOperations(object):
self.add(t, i) self.add(t, i)
def __getstate__(self): def __getstate__(self):
return self._properties#, self._reverse return self._properties
def __setstate__(self, state): def __setstate__(self, state):
self._properties = state[0] self._properties = state
# self._reverse = state[1]
def iteritems(self): def iteritems(self):
return self._properties.iteritems() return self._properties.iteritems()
@ -76,10 +75,7 @@ class ParameterIndexOperations(object):
return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index) return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index)
def add(self, prop, indices): def add(self, prop, indices):
try: self._properties[prop] = combine_indices(self._properties[prop], indices)
self._properties[prop] = combine_indices(self._properties[prop], indices)
except KeyError:
self._properties[prop] = indices
def remove(self, prop, indices): def remove(self, prop, indices):
if prop in self._properties: if prop in self._properties:
@ -125,13 +121,11 @@ class ParameterIndexOperationsView(object):
def __getstate__(self): def __getstate__(self):
return [self._param_index_ops, self._offset, self._size] return [self._param_index_ops, self._offset, self._size]
def __setstate__(self, state): def __setstate__(self, state):
self._param_index_ops = state[0] self._param_index_ops = state[0]
self._offset = state[1] self._offset = state[1]
self._size = state[2] self._size = state[2]
def _filter_index(self, ind): def _filter_index(self, ind):
return ind[(ind >= self._offset) * (ind < (self._offset + self._size))] - self._offset return ind[(ind >= self._offset) * (ind < (self._offset + self._size))] - self._offset

View file

@ -5,21 +5,17 @@ Created on 27 Feb 2014
''' '''
from collections import defaultdict from collections import defaultdict
class DefaultArrayDict(defaultdict):
def __init__(self): def intarray_default_factory():
import numpy as np
return np.int_([])
class IntArrayDict(defaultdict):
def __init__(self, default_factory=None):
""" """
Default will be self._default, if not set otherwise Default will be self._default, if not set otherwise
""" """
defaultdict.__init__(self, self.default_factory) defaultdict.__init__(self, intarray_default_factory)
class SetDict(DefaultArrayDict):
def default_factory(self):
return set()
class IntArrayDict(DefaultArrayDict):
def default_factory(self):
import numpy as np
return np.int_([])
class ArrayList(list): class ArrayList(list):
""" """

View file

@ -3,8 +3,8 @@
import itertools import itertools
import numpy import numpy
from parameter_core import OptimizationHandlable, Gradcheckable, adjust_name_for_printing from parameter_core import OptimizationHandlable, adjust_name_for_printing
from array_core import ObservableArray from array_core import ObsAr
###### printing ###### printing
__constraints_name__ = "Constraint" __constraints_name__ = "Constraint"
@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
__print_threshold__ = 5 __print_threshold__ = 5
###### ######
class Param(OptimizationHandlable, ObservableArray): class Param(OptimizationHandlable, ObsAr):
""" """
Parameter object for GPy models. Parameter object for GPy models.
@ -43,16 +43,12 @@ class Param(OptimizationHandlable, ObservableArray):
_fixes_ = None _fixes_ = None
_parameters_ = [] _parameters_ = []
def __new__(cls, name, input_array, default_constraint=None): def __new__(cls, name, input_array, default_constraint=None):
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array)) obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array, name=name, default_constraint=default_constraint))
cls.__name__ = "Param" cls.__name__ = "Param"
obj._current_slice_ = (slice(obj.shape[0]),) obj._current_slice_ = (slice(obj.shape[0]),)
obj._realshape_ = obj.shape obj._realshape_ = obj.shape
obj._realsize_ = obj.size obj._realsize_ = obj.size
obj._realndim_ = obj.ndim obj._realndim_ = obj.ndim
obj._updated_ = False
from lists_and_dicts import SetDict
obj._tied_to_me_ = SetDict()
obj._tied_to_ = []
obj._original_ = True obj._original_ = True
obj._gradient_array_ = numpy.zeros(obj.shape, dtype=numpy.float64) obj._gradient_array_ = numpy.zeros(obj.shape, dtype=numpy.float64)
return obj return obj
@ -81,14 +77,11 @@ class Param(OptimizationHandlable, ObservableArray):
self._parent_index_ = getattr(obj, '_parent_index_', None) self._parent_index_ = getattr(obj, '_parent_index_', None)
self._default_constraint_ = getattr(obj, '_default_constraint_', None) self._default_constraint_ = getattr(obj, '_default_constraint_', None)
self._current_slice_ = getattr(obj, '_current_slice_', None) self._current_slice_ = getattr(obj, '_current_slice_', None)
self._tied_to_me_ = getattr(obj, '_tied_to_me_', None)
self._tied_to_ = getattr(obj, '_tied_to_', None)
self._realshape_ = getattr(obj, '_realshape_', None) self._realshape_ = getattr(obj, '_realshape_', None)
self._realsize_ = getattr(obj, '_realsize_', None) self._realsize_ = getattr(obj, '_realsize_', None)
self._realndim_ = getattr(obj, '_realndim_', None) self._realndim_ = getattr(obj, '_realndim_', None)
self._updated_ = getattr(obj, '_updated_', None)
self._original_ = getattr(obj, '_original_', None) self._original_ = getattr(obj, '_original_', None)
self._name = getattr(obj, 'name', None) self._name = getattr(obj, '_name', None)
self._gradient_array_ = getattr(obj, '_gradient_array_', None) self._gradient_array_ = getattr(obj, '_gradient_array_', None)
self.constraints = getattr(obj, 'constraints', None) self.constraints = getattr(obj, 'constraints', None)
self.priors = getattr(obj, 'priors', None) self.priors = getattr(obj, 'priors', None)
@ -108,10 +101,10 @@ class Param(OptimizationHandlable, ObservableArray):
#=========================================================================== #===========================================================================
# Pickling operations # Pickling operations
#=========================================================================== #===========================================================================
def __reduce_ex__(self): def __reduce__(self):
func, args, state = super(Param, self).__reduce__() func, args, state = super(Param, self).__reduce__()
return func, args, (state, return func, args, (state,
(self.name, (self._name,
self._parent_, self._parent_,
self._parent_index_, self._parent_index_,
self._default_constraint_, self._default_constraint_,
@ -119,18 +112,16 @@ class Param(OptimizationHandlable, ObservableArray):
self._realshape_, self._realshape_,
self._realsize_, self._realsize_,
self._realndim_, self._realndim_,
self._tied_to_me_, self.constraints,
self._tied_to_, self.priors
self._updated_,
) )
) )
def __setstate__(self, state): def __setstate__(self, state):
super(Param, self).__setstate__(state[0]) super(Param, self).__setstate__(state[0])
state = list(state[1]) state = list(state[1])
self._updated_ = state.pop() self.priors = state.pop()
self._tied_to_ = state.pop() self.constraints = state.pop()
self._tied_to_me_ = state.pop()
self._realndim_ = state.pop() self._realndim_ = state.pop()
self._realsize_ = state.pop() self._realsize_ = state.pop()
self._realshape_ = state.pop() self._realshape_ = state.pop()
@ -138,7 +129,7 @@ class Param(OptimizationHandlable, ObservableArray):
self._default_constraint_ = state.pop() self._default_constraint_ = state.pop()
self._parent_index_ = state.pop() self._parent_index_ = state.pop()
self._parent_ = state.pop() self._parent_ = state.pop()
self.name = state.pop() self._name = state.pop()
def copy(self, *args): def copy(self, *args):
constr = self.constraints.copy() constr = self.constraints.copy()
@ -184,21 +175,21 @@ class Param(OptimizationHandlable, ObservableArray):
#=========================================================================== #===========================================================================
# Index Operations: # Index Operations:
#=========================================================================== #===========================================================================
def _internal_offset(self): #def _internal_offset(self):
internal_offset = 0 # internal_offset = 0
extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1] # extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1]
for i, si in enumerate(self._current_slice_[:self._realndim_]): # for i, si in enumerate(self._current_slice_[:self._realndim_]):
if numpy.all(si == Ellipsis): # if numpy.all(si == Ellipsis):
continue # continue
if isinstance(si, slice): # if isinstance(si, slice):
a = si.indices(self._realshape_[i])[0] # a = si.indices(self._realshape_[i])[0]
elif isinstance(si, (list,numpy.ndarray,tuple)): # elif isinstance(si, (list,numpy.ndarray,tuple)):
a = si[0] # a = si[0]
else: a = si # else: a = si
if a < 0: # if a < 0:
a = self._realshape_[i] + a # a = self._realshape_[i] + a
internal_offset += a * extended_realshape[i] # internal_offset += a * extended_realshape[i]
return internal_offset # return internal_offset
def _raveled_index(self, slice_index=None): def _raveled_index(self, slice_index=None):
# return an index array on the raveled array, which is formed by the current_slice # return an index array on the raveled array, which is formed by the current_slice
@ -208,6 +199,9 @@ class Param(OptimizationHandlable, ObservableArray):
if ind.ndim < 2: ind = ind[:, None] if ind.ndim < 2: ind = ind[:, None]
return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int) return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int)
def _raveled_index_for(self, obj):
return self._raveled_index()
def _expand_index(self, slice_index=None): def _expand_index(self, slice_index=None):
# this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes # this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes
# it basically translates slices to their respective index arrays and turns negative indices around # it basically translates slices to their respective index arrays and turns negative indices around
@ -228,6 +222,11 @@ class Param(OptimizationHandlable, ObservableArray):
return numpy.r_[a] return numpy.r_[a]
return numpy.r_[:b] return numpy.r_[:b]
return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size))) return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size)))
#===========================================================================
# Constrainable
#===========================================================================
def _ensure_fixes(self):
if not self._has_fixes(): self._fixes_ = numpy.ones(self._realsize_, dtype=bool)
#=========================================================================== #===========================================================================
# Convenience # Convenience
@ -243,7 +242,6 @@ class Param(OptimizationHandlable, ObservableArray):
#round.__doc__ = numpy.round.__doc__ #round.__doc__ = numpy.round.__doc__
def _get_original(self, param): def _get_original(self, param):
return self return self
#=========================================================================== #===========================================================================
# Printing -> done # Printing -> done
#=========================================================================== #===========================================================================
@ -270,23 +268,13 @@ class Param(OptimizationHandlable, ObservableArray):
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.iteritems()))] return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.iteritems()))]
@property @property
def _ties_str(self): def _ties_str(self):
return [t._short() for t in self._tied_to_] or [''] return ['']
def _ties_for(self, ravi):
return [['N/A']]*ravi.size
def __repr__(self, *args, **kwargs): def __repr__(self, *args, **kwargs):
name = "\033[1m{x:s}\033[0;0m:\n".format( name = "\033[1m{x:s}\033[0;0m:\n".format(
x=self.hierarchy_name()) x=self.hierarchy_name())
return name + super(Param, self).__repr__(*args, **kwargs) return name + super(Param, self).__repr__(*args, **kwargs)
def _ties_for(self, rav_index):
# size = sum(p.size for p in self._tied_to_)
ties = numpy.empty(shape=(len(self._tied_to_), numpy.size(rav_index)), dtype=Param)
for i, tied_to in enumerate(self._tied_to_):
for t, ind in tied_to._tied_to_me_.iteritems():
if t._parent_index_ == self._parent_index_:
matches = numpy.where(rav_index[:, None] == t._raveled_index()[None, :])
tt_rav_index = tied_to._raveled_index()
ind_rav_matches = numpy.where(tt_rav_index == numpy.array(list(ind)))[0]
if len(ind) != 1: ties[i, matches[0][ind_rav_matches]] = numpy.take(tt_rav_index, matches[1], mode='wrap')[ind_rav_matches]
else: ties[i, matches[0]] = numpy.take(tt_rav_index, matches[1], mode='wrap')
return map(lambda a: sum(a, []), zip(*[[[tie.flatten()] if tx != None else [] for tx in t] for t, tie in zip(ties, self._tied_to_)]))
def _indices(self, slice_index=None): def _indices(self, slice_index=None):
# get a int-array containing all indices in the first axis. # get a int-array containing all indices in the first axis.
if slice_index is None: if slice_index is None:
@ -327,7 +315,7 @@ class Param(OptimizationHandlable, ObservableArray):
if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi) if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi)
if prirs is None: prirs = self.priors.properties_for(ravi) if prirs is None: prirs = self.priors.properties_for(ravi)
if ties is None: ties = self._ties_for(ravi) if ties is None: ties = self._ties_for(ravi)
ties = [' '.join(map(lambda x: x._short(), t)) for t in ties] ties = [' '.join(map(lambda x: x, t)) for t in ties]
if lc is None: lc = self._max_len_names(constr_matrix, __constraints_name__) if lc is None: lc = self._max_len_names(constr_matrix, __constraints_name__)
if lx is None: lx = self._max_len_values() if lx is None: lx = self._max_len_values()
if li is None: li = self._max_len_index(indices) if li is None: li = self._max_len_index(indices)

View file

@ -15,9 +15,8 @@ Observable Pattern for patameterization
from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
import numpy as np import numpy as np
import itertools
__updated__ = '2013-12-16' __updated__ = '2014-03-18'
class HierarchyError(Exception): class HierarchyError(Exception):
""" """
@ -32,7 +31,71 @@ def adjust_name_for_printing(name):
return name.replace(" ", "_").replace(".", "_").replace("-", "_m_").replace("+", "_p_").replace("!", "_I_").replace("**", "_xx_").replace("*", "_x_").replace("/", "_l_").replace("@",'_at_') return name.replace(" ", "_").replace(".", "_").replace("-", "_m_").replace("+", "_p_").replace("!", "_I_").replace("**", "_xx_").replace("*", "_x_").replace("/", "_l_").replace("@",'_at_')
return '' return ''
class Observable(object): class InterfacePickleFunctions(object):
def __init__(self, *a, **kw):
super(InterfacePickleFunctions, self).__init__()
def _getstate(self):
"""
Returns the state of this class in a memento pattern.
The state must be a list-like structure of all the fields
this class needs to run.
See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
"""
raise NotImplementedError, "To be able to use pickling you need to implement this method"
def _setstate(self, state):
"""
Set the state (memento pattern) of this class to the given state.
Usually this is just the counterpart to _getstate, such that
an object is a copy of another when calling
copy = <classname>.__new__(*args,**kw)._setstate(<to_be_copied>._getstate())
See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
"""
raise NotImplementedError, "To be able to use pickling you need to implement this method"
class Pickleable(InterfacePickleFunctions):
"""
Make an object pickleable (See python doc 'pickling').
This class allows for pickling support by Memento pattern.
_getstate returns a memento of the class, which gets pickled.
_setstate(<memento>) (re-)sets the state of the class to the memento
"""
def __init__(self, *a, **kw):
super(Pickleable, self).__init__()
#===========================================================================
# Pickling operations
#===========================================================================
def pickle(self, f, protocol=-1):
"""
:param f: either filename or open file object to write to.
if it is an open buffer, you have to make sure to close
it properly.
:param protocol: pickling protocol to use, python-pickle for details.
"""
import cPickle
if isinstance(f, str):
with open(f, 'w') as f:
cPickle.dump(self, f, protocol)
else:
cPickle.dump(self, f, protocol)
def __getstate__(self):
if self._has_get_set_state():
return self._getstate()
return self.__dict__
def __setstate__(self, state):
if self._has_get_set_state():
self._setstate(state)
# TODO: maybe parameters_changed() here?
return
self.__dict__ = state
def _has_get_set_state(self):
return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__)
class Observable(Pickleable):
""" """
Observable pattern for parameterization. Observable pattern for parameterization.
@ -42,9 +105,8 @@ class Observable(object):
""" """
_updated = True _updated = True
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Observable, self).__init__(*args, **kwargs)
self._observer_callables_ = [] self._observer_callables_ = []
def __del__(self, *args, **kwargs):
del self._observer_callables_
def add_observer(self, observer, callble, priority=0): def add_observer(self, observer, callble, priority=0):
self._insert_sorted(priority, observer, callble) self._insert_sorted(priority, observer, callble)
@ -91,68 +153,17 @@ class Observable(object):
ins += 1 ins += 1
self._observer_callables_.insert(ins, (p, o, c)) self._observer_callables_.insert(ins, (p, o, c))
class Pickleable(object):
"""
Make an object pickleable (See python doc 'pickling').
This class allows for pickling support by Memento pattern.
_getstate returns a memento of the class, which gets pickled.
_setstate(<memento>) (re-)sets the state of the class to the memento
"""
#===========================================================================
# Pickling operations
#===========================================================================
def pickle(self, f, protocol=-1):
"""
:param f: either filename or open file object to write to.
if it is an open buffer, you have to make sure to close
it properly.
:param protocol: pickling protocol to use, python-pickle for details.
"""
import cPickle
if isinstance(f, str):
with open(f, 'w') as f:
cPickle.dump(self, f, protocol)
else:
cPickle.dump(self, f, protocol)
def __getstate__(self):
if self._has_get_set_state():
return self._getstate()
return self.__dict__
def __setstate__(self, state):
if self._has_get_set_state():
self._setstate(state)
# TODO: maybe parameters_changed() here?
return
self.__dict__ = state
def _has_get_set_state(self):
return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__)
def _getstate(self): def _getstate(self):
""" return [self._observer_callables_]
Returns the state of this class in a memento pattern.
The state must be a list-like structure of all the fields
this class needs to run.
See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
"""
raise NotImplementedError, "To be able to use pickling you need to implement this method"
def _setstate(self, state): def _setstate(self, state):
""" self._observer_callables_ = state.pop()
Set the state (memento pattern) of this class to the given state.
Usually this is just the counterpart to _getstate, such that
an object is a copy of another when calling
copy = <classname>.__new__(*args,**kw)._setstate(<to_be_copied>._getstate())
See python doc "pickling" (`__getstate__` and `__setstate__`) for details.
"""
raise NotImplementedError, "To be able to use pickling you need to implement this method"
#=============================================================================== #===============================================================================
# Foundation framework for parameterized and param objects: # Foundation framework for parameterized and param objects:
#=============================================================================== #===============================================================================
class Parentable(object): class Parentable(Observable):
""" """
Enable an Object to have a parent. Enable an Object to have a parent.
@ -161,6 +172,8 @@ class Parentable(object):
""" """
_parent_ = None _parent_ = None
_parent_index_ = None _parent_index_ = None
def __init__(self, *args, **kwargs):
super(Parentable, self).__init__(*args, **kwargs)
def has_parent(self): def has_parent(self):
""" """
@ -205,6 +218,7 @@ class Gradcheckable(Parentable):
""" """
def __init__(self, *a, **kw): def __init__(self, *a, **kw):
super(Gradcheckable, self).__init__(*a, **kw) super(Gradcheckable, self).__init__(*a, **kw)
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3): def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
""" """
Check the gradient of this parameter with respect to the highest parent's Check the gradient of this parameter with respect to the highest parent's
@ -221,12 +235,13 @@ class Gradcheckable(Parentable):
if self.has_parent(): if self.has_parent():
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance) return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
return self._checkgrad(self[''], verbose=verbose, step=step, tolerance=tolerance) return self._checkgrad(self[''], verbose=verbose, step=step, tolerance=tolerance)
def _checkgrad(self, param):
def _checkgrad(self, param, verbose=0, step=1e-6, tolerance=1e-3):
""" """
Perform the checkgrad on the model. Perform the checkgrad on the model.
TODO: this can be done more efficiently, when doing it inside here TODO: this can be done more efficiently, when doing it inside here
""" """
raise NotImplementedError, "Need log likelihood to check gradient against" raise HierarchyError, "This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!"
class Nameable(Gradcheckable): class Nameable(Gradcheckable):
@ -272,6 +287,9 @@ class Indexable(object):
Enable enraveled indexes and offsets for this object. Enable enraveled indexes and offsets for this object.
The raveled index of an object is the index for its parameters in a flattened int array. The raveled index of an object is the index for its parameters in a flattened int array.
""" """
def __init__(self, *a, **kw):
super(Indexable, self).__init__()
def _raveled_index(self): def _raveled_index(self):
""" """
Flattened array of ints, specifying the index of this object. Flattened array of ints, specifying the index of this object.
@ -279,13 +297,6 @@ class Indexable(object):
""" """
raise NotImplementedError, "Need to be able to get the raveled Index" raise NotImplementedError, "Need to be able to get the raveled Index"
def _internal_offset(self):
"""
The offset for this parameter inside its parent.
This has to account for shaped parameters!
"""
return 0
def _offset_for(self, param): def _offset_for(self, param):
""" """
Return the offset of the param inside this parameterized object. Return the offset of the param inside this parameterized object.
@ -347,9 +358,11 @@ class Constrainable(Nameable, Indexable):
""" """
if value is not None: if value is not None:
self[:] = value self[:] = value
self.constrain(__fixed__, warning=warning, trigger_parent=trigger_parent) reconstrained = self.unconstrain()
self._add_to_index_operations(self.constraints, reconstrained, __fixed__, warning)
rav_i = self._highest_parent_._raveled_index_for(self) rav_i = self._highest_parent_._raveled_index_for(self)
self._highest_parent_._set_fixed(rav_i) self._highest_parent_._set_fixed(rav_i)
self.notify_observers(self, None if trigger_parent else -np.inf)
fix = constrain_fixed fix = constrain_fixed
def unconstrain_fixed(self): def unconstrain_fixed(self):
@ -360,27 +373,32 @@ class Constrainable(Nameable, Indexable):
self._highest_parent_._set_unfixed(unconstrained) self._highest_parent_._set_unfixed(unconstrained)
unfix = unconstrain_fixed unfix = unconstrain_fixed
def _set_fixed(self, index): def _ensure_fixes(self):
# Ensure that the fixes array is set:
# Parameterized: ones(self.size)
# Param: ones(self._realsize_
if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool) if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
def _set_fixed(self, index):
self._ensure_fixes()
self._fixes_[index] = FIXED self._fixes_[index] = FIXED
if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED
def _set_unfixed(self, index): def _set_unfixed(self, index):
if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool) self._ensure_fixes()
# rav_i = self._raveled_index_for(param)[index]
self._fixes_[index] = UNFIXED self._fixes_[index] = UNFIXED
if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED
def _connect_fixes(self): def _connect_fixes(self):
fixed_indices = self.constraints[__fixed__] fixed_indices = self.constraints[__fixed__]
if fixed_indices.size > 0: if fixed_indices.size > 0:
self._fixes_ = np.ones(self.size, dtype=bool) * UNFIXED self._ensure_fixes()
self._fixes_[fixed_indices] = FIXED self._fixes_[fixed_indices] = FIXED
else: else:
self._fixes_ = None self._fixes_ = None
def _has_fixes(self): def _has_fixes(self):
return hasattr(self, "_fixes_") and self._fixes_ is not None return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size
#=========================================================================== #===========================================================================
# Prior Operations # Prior Operations
@ -394,6 +412,15 @@ class Constrainable(Nameable, Indexable):
repriorized = self.unset_priors() repriorized = self.unset_priors()
self._add_to_index_operations(self.priors, repriorized, prior, warning) self._add_to_index_operations(self.priors, repriorized, prior, warning)
from domains import _REAL, _POSITIVE, _NEGATIVE
if prior.domain is _POSITIVE:
self.constrain_positive(warning)
elif prior.domain is _NEGATIVE:
self.constrain_negative(warning)
elif prior.domain is _REAL:
rav_i = self._raveled_index()
assert all(all(c.domain is _REAL for c in con) for con in self.constraints.properties_for(rav_i))
def unset_priors(self, *priors): def unset_priors(self, *priors):
""" """
Un-set all priors given from this parameter handle. Un-set all priors given from this parameter handle.
@ -404,14 +431,14 @@ class Constrainable(Nameable, Indexable):
def log_prior(self): def log_prior(self):
"""evaluate the prior""" """evaluate the prior"""
if self.priors.size > 0: if self.priors.size > 0:
x = self._get_params() x = self._param_array_
return reduce(lambda a, b: a + b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0) return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
return 0. return 0.
def _log_prior_gradients(self): def _log_prior_gradients(self):
"""evaluate the gradients of the priors""" """evaluate the gradients of the priors"""
if self.priors.size > 0: if self.priors.size > 0:
x = self._get_params() x = self._param_array_
ret = np.zeros(x.size) ret = np.zeros(x.size)
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()] [np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
return ret return ret
@ -430,10 +457,10 @@ class Constrainable(Nameable, Indexable):
Constrain the parameter to the given Constrain the parameter to the given
:py:class:`GPy.core.transformations.Transformation`. :py:class:`GPy.core.transformations.Transformation`.
""" """
if isinstance(transform, Transformation): self._param_array_[:] = transform.initialize(self._param_array_)
self._param_array_[:] = transform.initialize(self._param_array_)
reconstrained = self.unconstrain() reconstrained = self.unconstrain()
self._add_to_index_operations(self.constraints, reconstrained, transform, warning) self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
self.notify_observers(self, None if trigger_parent else -np.inf)
def unconstrain(self, *transforms): def unconstrain(self, *transforms):
""" """
@ -530,12 +557,15 @@ class Constrainable(Nameable, Indexable):
return removed return removed
class OptimizationHandlable(Constrainable, Observable): class OptimizationHandlable(Constrainable):
""" """
This enables optimization handles on an Object as done in GPy 0.4. This enables optimization handles on an Object as done in GPy 0.4.
transformed: make sure the transformations and constraints etc are handled `..._transformed`: make sure the transformations and constraints etc are handled
""" """
def __init__(self, name, default_constraint=None, *a, **kw):
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
def transform(self): def transform(self):
[np.put(self._param_array_, ind, c.finv(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] [np.put(self._param_array_, ind, c.finv(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
@ -546,23 +576,29 @@ class OptimizationHandlable(Constrainable, Observable):
# transformed parameters (apply transformation rules) # transformed parameters (apply transformation rules)
p = self._param_array_.copy() p = self._param_array_.copy()
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes(): if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
return p[fixes]
elif self._has_fixes():
return p[self._fixes_] return p[self._fixes_]
return p return p
def _set_params_transformed(self, p): def _set_params_transformed(self, p):
if p is self._param_array_: if p is self._param_array_:
p = p.copy() p = p.copy()
if self._has_fixes(): self._param_array_[self._fixes_] = p if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
self._param_array_[fixes] = p
elif self._has_fixes(): self._param_array_[self._fixes_] = p
else: self._param_array_[:] = p else: self._param_array_[:] = p
self.untransform() self.untransform()
self._trigger_params_changed() self._trigger_params_changed()
def _trigger_params_changed(self, trigger_parent=True): def _trigger_params_changed(self, trigger_parent=True):
[p._trigger_params_changed(trigger_parent=False) for p in self._parameters_] [p._trigger_params_changed(trigger_parent=False) for p in self._parameters_]
if trigger_parent: min_priority = None self.notify_observers(None, None if trigger_parent else -np.inf)
else: min_priority = -np.inf
self.notify_observers(None, min_priority)
def _size_transformed(self): def _size_transformed(self):
return self.size - self.constraints[__fixed__].size return self.size - self.constraints[__fixed__].size
@ -625,6 +661,24 @@ class OptimizationHandlable(Constrainable, Observable):
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None] [np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
self._set_params_transformed(x) # makes sure all of the tied parameters get the same init (since there's only one prior object...) self._set_params_transformed(x) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
#===========================================================================
# For shared memory arrays. This does nothing in Param, but sets the memory
# for all parameterized objects
#===========================================================================
def _propagate_param_grad(self, parray, garray):
pi_old_size = 0
for pi in self._parameters_:
pislice = slice(pi_old_size, pi_old_size+pi.size)
self._param_array_[pislice] = pi._param_array_.ravel()#, requirements=['C', 'W']).flat
self._gradient_array_[pislice] = pi._gradient_array_.ravel()#, requirements=['C', 'W']).flat
pi._param_array_.data = parray[pislice].data
pi._gradient_array_.data = garray[pislice].data
pi._propagate_param_grad(parray[pislice], garray[pislice])
pi_old_size += pi.size
class Parameterizable(OptimizationHandlable): class Parameterizable(OptimizationHandlable):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Parameterizable, self).__init__(*args, **kwargs) super(Parameterizable, self).__init__(*args, **kwargs)
@ -668,6 +722,10 @@ class Parameterizable(OptimizationHandlable):
elif pname not in dir(self): elif pname not in dir(self):
self.__dict__[pname] = param self.__dict__[pname] = param
self._added_names_.add(pname) self._added_names_.add(pname)
else:
print "WARNING: added a parameter with formatted name {}, which is already a member of {} object. Trying to change the parameter name to\n {}".format(pname, self.__class__, param.name+"_")
param.name += "_"
self._add_parameter_name(param, ignore_added_names)
def _remove_parameter_name(self, param=None, pname=None): def _remove_parameter_name(self, param=None, pname=None):
assert param is None or pname is None, "can only delete either param by name, or the name of a param" assert param is None or pname is None, "can only delete either param by name, or the name of a param"
@ -720,11 +778,11 @@ class Parameterizable(OptimizationHandlable):
Add all parameters to this param class, you can insert parameters Add all parameters to this param class, you can insert parameters
at any given index using the :func:`list.insert` syntax at any given index using the :func:`list.insert` syntax
""" """
# if param.has_parent():
# raise AttributeError, "parameter {} already in another model, create new object (or copy) for adding".format(param._short())
if param in self._parameters_ and index is not None: if param in self._parameters_ and index is not None:
self.remove_parameter(param) self.remove_parameter(param)
self.add_parameter(param, index) self.add_parameter(param, index)
elif param.has_parent():
raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
elif param not in self._parameters_: elif param not in self._parameters_:
if param.has_parent(): if param.has_parent():
parent = param._parent_ parent = param._parent_
@ -748,13 +806,19 @@ class Parameterizable(OptimizationHandlable):
param.add_observer(self, self._pass_through_notify_observers, -np.inf) param.add_observer(self, self._pass_through_notify_observers, -np.inf)
self.size += param.size parent = self
while parent is not None:
parent.size += param.size
parent = parent._parent_
self._connect_parameters()
self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
self._highest_parent_._notify_parent_change()
self._highest_parent_._connect_fixes()
self._connect_parameters(ignore_added_names=_ignore_added_names)
self._notify_parent_change()
self._connect_fixes()
else: else:
raise RuntimeError, """Parameter exists already added and no copy made""" raise HierarchyError, """Parameter exists already and no copy made"""
def add_parameters(self, *parameters): def add_parameters(self, *parameters):
@ -780,17 +844,18 @@ class Parameterizable(OptimizationHandlable):
param.remove_observer(self, self._pass_through_notify_observers) param.remove_observer(self, self._pass_through_notify_observers)
self.constraints.shift_left(start, param.size) self.constraints.shift_left(start, param.size)
self._connect_fixes()
self._connect_parameters() self._connect_parameters()
self._notify_parent_change() self._notify_parent_change()
parent = self._parent_ parent = self._parent_
while parent is not None: while parent is not None:
parent._connect_fixes() parent.size -= param.size
parent._connect_parameters()
parent._notify_parent_change()
parent = parent._parent_ parent = parent._parent_
self._highest_parent_._connect_parameters()
self._highest_parent_._connect_fixes()
self._highest_parent_._notify_parent_change()
def _connect_parameters(self, ignore_added_names=False): def _connect_parameters(self, ignore_added_names=False):
# connect parameterlist to this parameterized object # connect parameterlist to this parameterized object
# This just sets up the right connection for the params objects # This just sets up the right connection for the params objects
@ -805,28 +870,24 @@ class Parameterizable(OptimizationHandlable):
self._gradient_array_ = np.empty(self.size, dtype=np.float64) self._gradient_array_ = np.empty(self.size, dtype=np.float64)
self._param_slices_ = [] self._param_slices_ = []
for i, p in enumerate(self._parameters_): for i, p in enumerate(self._parameters_):
p._parent_ = self p._parent_ = self
p._parent_index_ = i p._parent_index_ = i
pslice = slice(old_size, old_size+p.size) pslice = slice(old_size, old_size+p.size)
pi_old_size = old_size # first connect all children
for pi in p.flattened_parameters: p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice])
pislice = slice(pi_old_size, pi_old_size+pi.size) # then connect children to self
self._param_array_[pslice] = p._param_array_.ravel()#, requirements=['C', 'W']).ravel(order='C')
self._param_array_[pislice] = pi._param_array_.flat self._gradient_array_[pslice] = p._gradient_array_.ravel()#, requirements=['C', 'W']).ravel(order='C')
self._gradient_array_[pislice] = pi._gradient_array_.flat
pi._param_array_.data = self._param_array_[pislice].data
pi._gradient_array_.data = self._gradient_array_[pislice].data
pi_old_size += pi.size
if not p._param_array_.flags['C_CONTIGUOUS']:
import ipdb;ipdb.set_trace()
p._param_array_.data = self._param_array_[pslice].data p._param_array_.data = self._param_array_[pslice].data
p._gradient_array_.data = self._gradient_array_[pslice].data p._gradient_array_.data = self._gradient_array_[pslice].data
self._param_slices_.append(pslice) self._param_slices_.append(pslice)
self._add_parameter_name(p, ignore_added_names=ignore_added_names) self._add_parameter_name(p, ignore_added_names=ignore_added_names)
old_size += p.size old_size += p.size
@ -843,6 +904,7 @@ class Parameterizable(OptimizationHandlable):
#=========================================================================== #===========================================================================
def copy(self): def copy(self):
"""Returns a (deep) copy of the current model""" """Returns a (deep) copy of the current model"""
raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy"
import copy import copy
from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView
from .lists_and_dicts import ArrayList from .lists_and_dicts import ArrayList

View file

@ -65,8 +65,8 @@ class Parameterized(Parameterizable, Pickleable):
# **Never** call parameters_changed() yourself # **Never** call parameters_changed() yourself
__metaclass__ = ParametersChangedMeta __metaclass__ = ParametersChangedMeta
#=========================================================================== #===========================================================================
def __init__(self, name=None, *a, **kw): def __init__(self, name=None, parameters=[], *a, **kw):
super(Parameterized, self).__init__(name=name, parent=None, parent_index=None, *a, **kw) super(Parameterized, self).__init__(name=name, *a, **kw)
self._in_init_ = True self._in_init_ = True
self._parameters_ = ArrayList() self._parameters_ = ArrayList()
self.size = sum(p.size for p in self._parameters_) self.size = sum(p.size for p in self._parameters_)
@ -76,6 +76,7 @@ class Parameterized(Parameterizable, Pickleable):
self._param_slices_ = [] self._param_slices_ = []
self._connect_parameters() self._connect_parameters()
del self._in_init_ del self._in_init_
self.add_parameters(*parameters)
def build_pydot(self, G=None): def build_pydot(self, G=None):
import pydot # @UnresolvedImport import pydot # @UnresolvedImport
@ -100,7 +101,6 @@ class Parameterized(Parameterizable, Pickleable):
return G return G
return node return node
def _getstate(self): def _getstate(self):
""" """
Get the current state of the class, Get the current state of the class,
@ -205,20 +205,24 @@ class Parameterized(Parameterizable, Pickleable):
return found_params return found_params
def __getitem__(self, name, paramlist=None): def __getitem__(self, name, paramlist=None):
if paramlist is None: if isinstance(name, (int, slice, tuple, np.ndarray)):
paramlist = self.grep_param_names(name) return self._param_array_[name]
if len(paramlist) < 1: raise AttributeError, name else:
if len(paramlist) == 1: if paramlist is None:
if isinstance(paramlist[-1], Parameterized): paramlist = self.grep_param_names(name)
paramlist = paramlist[-1].flattened_parameters if len(paramlist) < 1: raise AttributeError, name
if len(paramlist) != 1: if len(paramlist) == 1:
return ParamConcatenation(paramlist) if isinstance(paramlist[-1], Parameterized):
return paramlist[-1] paramlist = paramlist[-1].flattened_parameters
return ParamConcatenation(paramlist) if len(paramlist) != 1:
return ParamConcatenation(paramlist)
return paramlist[-1]
return ParamConcatenation(paramlist)
def __setitem__(self, name, value, paramlist=None): def __setitem__(self, name, value, paramlist=None):
if isinstance(name, (slice, tuple, np.ndarray)): if isinstance(name, (slice, tuple, np.ndarray)):
self._param_array_[name] = value self._param_array_[name] = value
self.notify_observers()
else: else:
try: param = self.__getitem__(name, paramlist) try: param = self.__getitem__(name, paramlist)
except AttributeError as a: raise a except AttributeError as a: raise a

View file

@ -63,12 +63,13 @@ class SpikeAndSlabPrior(VariationalPrior):
class VariationalPosterior(Parameterized): class VariationalPosterior(Parameterized):
def __init__(self, means=None, variances=None, name=None, **kw): def __init__(self, means=None, variances=None, name=None, *a, **kw):
super(VariationalPosterior, self).__init__(name=name, **kw) super(VariationalPosterior, self).__init__(name=name, *a, **kw)
self.mean = Param("mean", means) self.mean = Param("mean", means)
self.variance = Param("variance", variances, Logexp())
self.ndim = self.mean.ndim self.ndim = self.mean.ndim
self.shape = self.mean.shape self.shape = self.mean.shape
self.variance = Param("variance", variances, Logexp()) self.num_data, self.input_dim = self.mean.shape
self.add_parameters(self.mean, self.variance) self.add_parameters(self.mean, self.variance)
self.num_data, self.input_dim = self.mean.shape self.num_data, self.input_dim = self.mean.shape
if self.has_uncertain_inputs(): if self.has_uncertain_inputs():
@ -77,6 +78,24 @@ class VariationalPosterior(Parameterized):
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
return not self.variance is None return not self.variance is None
def __getitem__(self, s):
if isinstance(s, (int, slice, tuple, list, np.ndarray)):
import copy
n = self.__new__(self.__class__, self.name)
dc = self.__dict__.copy()
dc['mean'] = self.mean[s]
dc['variance'] = self.variance[s]
dc['_parameters_'] = copy.copy(self._parameters_)
n.__dict__.update(dc)
n._parameters_[dc['mean']._parent_index_] = dc['mean']
n._parameters_[dc['variance']._parent_index_] = dc['variance']
n.ndim = n.mean.ndim
n.shape = n.mean.shape
n.num_data = n.mean.shape[0]
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
return n
else:
return super(VariationalPrior, self).__getitem__(s)
class NormalPosterior(VariationalPosterior): class NormalPosterior(VariationalPosterior):
''' '''
@ -108,6 +127,27 @@ class SpikeAndSlabPosterior(VariationalPosterior):
self.gamma = Param("binary_prob",binary_prob, Logistic(1e-10,1.-1e-10)) self.gamma = Param("binary_prob",binary_prob, Logistic(1e-10,1.-1e-10))
self.add_parameter(self.gamma) self.add_parameter(self.gamma)
def __getitem__(self, s):
if isinstance(s, (int, slice, tuple, list, np.ndarray)):
import copy
n = self.__new__(self.__class__, self.name)
dc = self.__dict__.copy()
dc['mean'] = self.mean[s]
dc['variance'] = self.variance[s]
dc['binary_prob'] = self.binary_prob[s]
dc['_parameters_'] = copy.copy(self._parameters_)
n.__dict__.update(dc)
n._parameters_[dc['mean']._parent_index_] = dc['mean']
n._parameters_[dc['variance']._parent_index_] = dc['variance']
n._parameters_[dc['binary_prob']._parent_index_] = dc['binary_prob']
n.ndim = n.mean.ndim
n.shape = n.mean.shape
n.num_data = n.mean.shape[0]
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
return n
else:
return super(VariationalPrior, self).__getitem__(s)
def plot(self, *args): def plot(self, *args):
""" """
Plot latent space X in 1D: Plot latent space X in 1D:

View file

@ -31,7 +31,7 @@ class SparseGP(GP):
""" """
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp'): def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None):
#pick a sensible inference method #pick a sensible inference method
if inference_method is None: if inference_method is None:
@ -45,7 +45,7 @@ class SparseGP(GP):
self.Z = Param('inducing inputs', Z) self.Z = Param('inducing inputs', Z)
self.num_inducing = Z.shape[0] self.num_inducing = Z.shape[0]
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name) GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata)
self.add_parameter(self.Z, index=0) self.add_parameter(self.Z, index=0)
@ -53,19 +53,19 @@ class SparseGP(GP):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood')) self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
if isinstance(self.X, VariationalPosterior): if isinstance(self.X, VariationalPosterior):
#gradients wrt kernel #gradients wrt kernel
dL_dKmm = self.grad_dict.pop('dL_dKmm') dL_dKmm = self.grad_dict.pop('dL_dKmm')
self.kern.update_gradients_full(dL_dKmm, self.Z, None) self.kern.update_gradients_full(dL_dKmm, self.Z, None)
target = self.kern.gradient.copy() target = self.kern.gradient.copy()
self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict) self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
self.kern.gradient += target self.kern.gradient += target
#gradients wrt Z #gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z) self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations( self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X) self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
else: else:
#gradients wrt kernel #gradients wrt kernel
@ -75,10 +75,9 @@ class SparseGP(GP):
target += self.kern.gradient target += self.kern.gradient
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None) self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
self.kern.gradient += target self.kern.gradient += target
#gradients wrt Z #gradients wrt Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X) self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
def _raw_predict(self, Xnew, full_cov=False): def _raw_predict(self, Xnew, full_cov=False):
""" """

View file

@ -89,7 +89,7 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
likelihood = GPy.likelihoods.Bernoulli() likelihood = GPy.likelihoods.Bernoulli()
laplace_inf = GPy.inference.latent_function_inference.Laplace() laplace_inf = GPy.inference.latent_function_inference.Laplace()
kernel = GPy.kern.rbf(1) kernel = GPy.kern.RBF(1)
# Model definition # Model definition
m = GPy.core.GP(data['X'], Y, kernel=kernel, likelihood=likelihood, inference_method=laplace_inf) m = GPy.core.GP(data['X'], Y, kernel=kernel, likelihood=likelihood, inference_method=laplace_inf)

View file

@ -0,0 +1,80 @@
import numpy as np
import pylab as pb
import GPy
pb.ion()
pb.close('all')
X1 = np.arange(3)[:,None]
X2 = np.arange(4)[:,None]
I1 = np.zeros_like(X1)
I2 = np.ones_like(X2)
_X = np.vstack([ X1, X2 ])
_I = np.vstack([ I1, I2 ])
X = np.hstack([ _X, _I ])
Y1 = np.sin(X1/8.)
Y2 = np.cos(X2/8.)
Bias = GPy.kern.Bias(1,active_dims=[0])
Coreg = GPy.kern.Coregionalize(1,2,active_dims=[1])
K = Bias.prod(Coreg,name='X')
#K.coregion.W = 0
#print K.coregion.W
#print Bias.K(_X,_X)
#print K.K(X,X)
#pb.matshow(K.K(X,X))
Mlist = [GPy.kern.Matern32(1,lengthscale=20.,name="Mat")]
kern = GPy.util.multioutput.LCM(input_dim=1,num_outputs=2,kernels_list=Mlist,name='H')
kern.B.W = 0
kern.B.kappa = 1.
#kern.B.W.fix()
#kern.B.kappa.fix()
#m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2], kernel=kern)
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1], Y_list=[Y1], kernel=kern)
#m.optimize()
m.checkgrad(verbose=1)
fig = pb.figure()
ax0 = fig.add_subplot(211)
ax1 = fig.add_subplot(212)
slices = GPy.util.multioutput.get_slices([Y1,Y2])
m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],ax=ax0)
#m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],ax=ax1)
"""
X1 = 100 * np.random.rand(100)[:,None]
X2 = 100 * np.random.rand(100)[:,None]
#X1.sort()
#X2.sort()
Y1 = np.sin(X1/10.) + np.random.rand(100)[:,None]
Y2 = np.cos(X2/10.) + np.random.rand(100)[:,None]
Mlist = [GPy.kern.Matern32(1,lengthscale=20.,name="Mat")]
kern = GPy.util.multioutput.LCM(input_dim=1,num_outputs=12,kernels_list=Mlist,name='H')
m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2], kernel=kern)
m.optimize()
fig = pb.figure()
ax0 = fig.add_subplot(211)
ax1 = fig.add_subplot(212)
slices = GPy.util.multioutput.get_slices([Y1,Y2])
m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],ax=ax0)
m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],ax=ax1)
"""

View file

@ -324,14 +324,14 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
likelihood_list = [Gaussian(x, normalize=True) for x in Ylist]
k = kern.Linear(Q, ARD=True) + kern.Bias(Q, _np.exp(-2)) + kern.White(Q, _np.exp(-2)) #Ylist = [Ylist[0]]
m = MRD(likelihood_list, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw) k = [kern.Linear(Q, ARD=True) + kern.White(Q, 1e-4) for _ in range(len(Ylist))]
m.ensure_default_constraints() m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="", initz='permute', **kw)
for i, bgplvm in enumerate(m.bgplvms): m['.*noise'] = [Y.var()/500. for Y in Ylist]
m['{}_noise'.format(i)] = bgplvm.likelihood.Y.var() / 500. #for i, Y in enumerate(Ylist):
# m['.*Y_{}.*Gaussian.*noise'.format(i)] = Y.var(1) / 500.
if optimize: if optimize:
print "Optimizing Model:" print "Optimizing Model:"

View file

@ -318,7 +318,7 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize
Y /= Y.std() Y /= Y.std()
if kernel_type == 'linear': if kernel_type == 'linear':
kernel = GPy.kern.linear(X.shape[1], ARD=1) kernel = GPy.kern.Linear(X.shape[1], ARD=1)
elif kernel_type == 'rbf_inv': elif kernel_type == 'rbf_inv':
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1) kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
else: else:
@ -357,7 +357,7 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
Y /= Y.std() Y /= Y.std()
if kernel_type == 'linear': if kernel_type == 'linear':
kernel = GPy.kern.linear(X.shape[1], ARD=1) kernel = GPy.kern.Linear(X.shape[1], ARD=1)
elif kernel_type == 'rbf_inv': elif kernel_type == 'rbf_inv':
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1) kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
else: else:
@ -468,7 +468,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt
def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True): def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
"""Run a 1D example of a sparse GP regression with uncertain inputs.""" """Run a 1D example of a sparse GP regression with uncertain inputs."""
fig, axes = pb.subplots(1, 2, figsize=(12, 5)) fig, axes = pb.subplots(1, 2, figsize=(12, 5), sharex=True, sharey=True)
# sample inputs and outputs # sample inputs and outputs
S = np.ones((20, 1)) S = np.ones((20, 1))

View file

@ -27,8 +27,8 @@ etc.
from exact_gaussian_inference import ExactGaussianInference from exact_gaussian_inference import ExactGaussianInference
from laplace import Laplace from laplace import Laplace
expectation_propagation = 'foo' # TODO
from GPy.inference.latent_function_inference.var_dtc import VarDTC from GPy.inference.latent_function_inference.var_dtc import VarDTC
from expectation_propagation import EP
from dtc import DTC from dtc import DTC
from fitc import FITC from fitc import FITC

View file

@ -19,7 +19,7 @@ class DTC(object):
def __init__(self): def __init__(self):
self.const_jitter = 1e-6 self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y): def inference(self, kern, X, Z, likelihood, Y):
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC." assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
#TODO: MAX! fix this! #TODO: MAX! fix this!
@ -78,11 +78,9 @@ class DTC(object):
Uv = np.dot(U, v) Uv = np.dot(U, v)
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1))*beta**2 dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1))*beta**2
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU.T} dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
#update gradients grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
likelihood.update_gradients(dL_dR)
#construct a posterior object #construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L) post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
@ -158,11 +156,8 @@ class vDTC(object):
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2 dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2
dL_dR -=beta*trace_term/num_data dL_dR -=beta*trace_term/num_data
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T} dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}
#update gradients
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
likelihood.update_gradients(dL_dR)
#construct a posterior object #construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L) post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)

View file

@ -3,6 +3,7 @@
from posterior import Posterior from posterior import Posterior
from ...util.linalg import pdinv, dpotrs, tdot from ...util.linalg import pdinv, dpotrs, tdot
from ...util import diag
import numpy as np import numpy as np
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
@ -41,7 +42,9 @@ class ExactGaussianInference(object):
K = kern.K(X) K = kern.K(X)
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata)) Ky = K.copy()
diag.add(Ky, likelihood.gaussian_variance(Y, Y_metadata))
Wi, LW, LWi, W_logdet = pdinv(Ky)
alpha, _ = dpotrs(LW, YYT_factor, lower=1) alpha, _ = dpotrs(LW, YYT_factor, lower=1)
@ -49,9 +52,6 @@ class ExactGaussianInference(object):
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi) dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
#TODO: does this really live here? dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata)
likelihood.update_gradients(np.diag(dL_dK))
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK}
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}

View file

@ -1,7 +1,7 @@
import numpy as np import numpy as np
from scipy import stats from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs
from ..util.linalg import pdinv,mdot,jitchol,chol_inv,DSYR,tdot,dtrtrs from posterior import Posterior
from likelihood import likelihood log_2_pi = np.log(2*np.pi)
class EP(object): class EP(object):
def __init__(self, epsilon=1e-6, eta=1., delta=1.): def __init__(self, epsilon=1e-6, eta=1., delta=1.):
@ -28,30 +28,30 @@ class EP(object):
K = kern.K(X) K = kern.K(X)
mu_tilde, tau_tilde = self.expectation_propagation() mu, Sigma, mu_tilde, tau_tilde, Z_hat = self.expectation_propagation(K, Y, likelihood, Y_metadata)
Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde) Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))
alpha, _ = dpotrs(LW, mu_tilde, lower=1) alpha, _ = dpotrs(LW, mu_tilde, lower=1)
log_marginal = 0.5*(-num_data * log_2_pi - W_logdet - np.sum(alpha * mu_tilde)) log_marginal = 0.5*(-num_data * log_2_pi - W_logdet - np.sum(alpha * mu_tilde)) # TODO: add log Z_hat??
dL_dK = 0.5 * (tdot(alpha[:,None]) - Wi) dL_dK = 0.5 * (tdot(alpha[:,None]) - Wi)
#TODO: what abot derivatives of the likelihood parameters? dL_dthetaL = np.zeros(likelihood.size)#TODO: derivatives of the likelihood parameters
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK} return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
def expectation_propagation(self, K, Y, Y_metadata, likelihood) def expectation_propagation(self, K, Y, likelihood, Y_metadata):
num_data, data_dim = Y.shape num_data, data_dim = Y.shape
assert data_dim == 1, "This EP methods only works for 1D outputs" assert data_dim == 1, "This EP methods only works for 1D outputs"
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.num_data) mu = np.zeros(num_data)
Sigma = K.copy() Sigma = K.copy()
#Initial values - Marginal moments #Initial values - Marginal moments
@ -61,33 +61,32 @@ class EP(object):
#initial values - Gaussian factors #initial values - Gaussian factors
if self.old_mutilde is None: if self.old_mutilde is None:
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data, num_data)) tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
else: else:
assert old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!" assert old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
tau_tilde = v_tilde/mu_tilde tau_tilde = v_tilde/mu_tilde
#Approximation #Approximation
epsilon_np1 = self.epsilon + 1. tau_diff = self.epsilon + 1.
epsilon_np2 = self.epsilon + 1. v_diff = self.epsilon + 1.
iterations = 0 iterations = 0
while (epsilon_np1 > self.epsilon) or (epsilon_np2 > self.epsilon): while (tau_diff > self.epsilon) or (v_diff > self.epsilon):
update_order = np.random.permutation(num_data) update_order = np.random.permutation(num_data)
for i in update_order: for i in update_order:
#Cavity distribution parameters #Cavity distribution parameters
tau_cav = 1./Sigma[i,i] - self.eta*tau_tilde[i] tau_cav = 1./Sigma[i,i] - self.eta*tau_tilde[i]
v_cav = mu[i]/Sigma[i,i] - self.eta*v_tilde[i] v_cav = mu[i]/Sigma[i,i] - self.eta*v_tilde[i]
#Marginal moments #Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match(Y[i], tau_cav, v_cav, Y_metadata=(None if Y_metadata is None else Y_metadata[i])) Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav, v_cav)#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
#Site parameters update #Site parameters update
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i]) delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i]) delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
tau_tilde[i] += delta_tau tau_tilde[i] += delta_tau
v_tilde[i] += delta_v v_tilde[i] += delta_v
#Posterior distribution parameters update #Posterior distribution parameters update
DSYR(Sigma, Sigma[:,i].copy(), -Delta_tau/(1.+ Delta_tau*Sigma[i,i])) DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
mu = np.dot(Sigma, v_tilde) mu = np.dot(Sigma, v_tilde)
iterations += 1
#(re) compute Sigma and mu using full Cholesky decompy #(re) compute Sigma and mu using full Cholesky decompy
tau_tilde_root = np.sqrt(tau_tilde) tau_tilde_root = np.sqrt(tau_tilde)
@ -99,10 +98,14 @@ class EP(object):
mu = np.dot(Sigma,v_tilde) mu = np.dot(Sigma,v_tilde)
#monitor convergence #monitor convergence
epsilon_np1 = np.mean(np.square(tau_tilde-tau_tilde_old)) if iterations>0:
epsilon_np2 = np.mean(np.square(v_tilde-v_tilde_old)) tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
tau_tilde_old = tau_tilde.copy() tau_tilde_old = tau_tilde.copy()
v_tilde_old = v_tilde.copy() v_tilde_old = v_tilde.copy()
return mu, Sigma, mu_tilde, tau_tilde iterations += 1
mu_tilde = v_tilde/tau_tilde
return mu, Sigma, mu_tilde, tau_tilde, Z_hat

View file

@ -3,6 +3,7 @@
from posterior import Posterior from posterior import Posterior
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
from ...util import diag
import numpy as np import numpy as np
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
@ -14,15 +15,9 @@ class FITC(object):
the posterior. the posterior.
""" """
def __init__(self): const_jitter = 1e-6
self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y): def inference(self, kern, X, Z, likelihood, Y):
assert X_variance is None, "cannot use X_variance with FITC. Try varDTC."
#TODO: MAX! fix this!
from ...util.misc import param_to_array
Y = param_to_array(Y)
num_inducing, _ = Z.shape num_inducing, _ = Z.shape
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
@ -38,6 +33,7 @@ class FITC(object):
U = Knm U = Knm
#factor Kmm #factor Kmm
diag.add(Kmm, self.const_jitter)
Kmmi, L, Li, _ = pdinv(Kmm) Kmmi, L, Li, _ = pdinv(Kmm)
#compute beta_star, the effective noise precision #compute beta_star, the effective noise precision
@ -81,11 +77,8 @@ class FITC(object):
dL_dU *= beta_star dL_dU *= beta_star
dL_dU -= 2.*KiU*dL_dR dL_dU -= 2.*KiU*dL_dR
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T} dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}
#update gradients
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
likelihood.update_gradients(dL_dR)
#construct a posterior object #construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L) post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)

View file

@ -52,15 +52,13 @@ class Laplace(object):
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata) f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
self.f_hat = f_hat self.f_hat = f_hat
self.Ki_fhat = Ki_fhat
self.K = K.copy()
#Compute hessian and other variables at mode #Compute hessian and other variables at mode
log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata) log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
kern.update_gradients_full(dL_dK, X)
likelihood.update_gradients(dL_dthetaL)
self._previous_Ki_fhat = Ki_fhat.copy() self._previous_Ki_fhat = Ki_fhat.copy()
return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK} return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None): def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None):
""" """

View file

@ -2,7 +2,8 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior from posterior import Posterior
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
from ...util import diag
from ...core.parameterization.variational import VariationalPosterior from ...core.parameterization.variational import VariationalPosterior
import numpy as np import numpy as np
from ...util.misc import param_to_array from ...util.misc import param_to_array
@ -47,7 +48,7 @@ class VarDTC(object):
def get_VVTfactor(self, Y, prec): def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective return Y * prec # TODO chache this, and make it effective
def inference(self, kern, X, Z, likelihood, Y): def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
if isinstance(X, VariationalPosterior): if isinstance(X, VariationalPosterior):
uncertain_inputs = True uncertain_inputs = True
psi0 = kern.psi0(Z, X) psi0 = kern.psi0(Z, X)
@ -64,7 +65,7 @@ class VarDTC(object):
_, output_dim = Y.shape _, output_dim = Y.shape
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6) beta = 1./np.fmax(likelihood.gaussian_variance(Y, Y_metadata), 1e-6)
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
#self.YYTfactor = self.get_YYTfactor(Y) #self.YYTfactor = self.get_YYTfactor(Y)
#VVT_factor = self.get_VVTfactor(self.YYTfactor, beta) #VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
@ -73,13 +74,14 @@ class VarDTC(object):
trYYT = self.get_trYYT(Y) trYYT = self.get_trYYT(Y)
# do the inference: # do the inference:
het_noise = beta.size < 1 het_noise = beta.size > 1
num_inducing = Z.shape[0] num_inducing = Z.shape[0]
num_data = Y.shape[0] num_data = Y.shape[0]
# kernel computations, using BGPLVM notation # kernel computations, using BGPLVM notation
Kmm = kern.K(Z)
Lm = jitchol(Kmm+np.eye(Z.shape[0])*self.const_jitter) Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm)
# The rather complex computations of A # The rather complex computations of A
if uncertain_inputs: if uncertain_inputs:
@ -132,28 +134,28 @@ class VarDTC(object):
# log marginal likelihood # log marginal likelihood
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit) psi0, A, LB, trYYT, data_fit, VVT_factor)
#put the gradients in the right places #put the gradients in the right places
partial_for_likelihood = _compute_partial_for_likelihood(likelihood, dL_dR = _compute_dL_dR(likelihood,
het_noise, uncertain_inputs, LB, het_noise, uncertain_inputs, LB,
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
psi0, psi1, beta, psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT) data_fit, num_data, output_dim, trYYT, Y)
#likelihood.update_gradients(partial_for_likelihood) dL_dthetaL = likelihood.exact_inference_gradients(dL_dR,Y_metadata)
if uncertain_inputs: if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm, grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dpsi0':dL_dpsi0, 'dL_dpsi0':dL_dpsi0,
'dL_dpsi1':dL_dpsi1, 'dL_dpsi1':dL_dpsi1,
'dL_dpsi2':dL_dpsi2, 'dL_dpsi2':dL_dpsi2,
'partial_for_likelihood':partial_for_likelihood} 'dL_dthetaL':dL_dthetaL}
else: else:
grad_dict = {'dL_dKmm': dL_dKmm, grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dKdiag':dL_dpsi0, 'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1, 'dL_dKnm':dL_dpsi1,
'partial_for_likelihood':partial_for_likelihood} 'dL_dthetaL':dL_dthetaL}
#get sufficient things for posterior prediction #get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop? #TODO: do we really want to do this in the loop?
@ -168,7 +170,6 @@ class VarDTC(object):
Bi, _ = dpotri(LB, lower=1) Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi) symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0] Bi = -dpotri(LB, lower=1)[0]
from ...util import diag
diag.add(Bi, 1) diag.add(Bi, 1)
woodbury_inv = backsub_both_sides(Lm, Bi) woodbury_inv = backsub_both_sides(Lm, Bi)
@ -207,7 +208,7 @@ class VarDTCMissingData(object):
self._subarray_indices = [[slice(None),slice(None)]] self._subarray_indices = [[slice(None),slice(None)]]
return [Y], [(Y**2).sum()] return [Y], [(Y**2).sum()]
def inference(self, kern, X, Z, likelihood, Y): def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
if isinstance(X, VariationalPosterior): if isinstance(X, VariationalPosterior):
uncertain_inputs = True uncertain_inputs = True
psi0_all = kern.psi0(Z, X) psi0_all = kern.psi0(Z, X)
@ -220,7 +221,7 @@ class VarDTCMissingData(object):
psi2_all = None psi2_all = None
Ys, traces = self._Y(Y) Ys, traces = self._Y(Y)
beta_all = 1./np.fmax(likelihood.variance, 1e-6) beta_all = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
het_noise = beta_all.size != 1 het_noise = beta_all.size != 1
import itertools import itertools
@ -231,13 +232,14 @@ class VarDTCMissingData(object):
if uncertain_inputs: if uncertain_inputs:
dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing)) dL_dpsi2_all = np.zeros((Y.shape[0], num_inducing, num_inducing))
partial_for_likelihood = 0 dL_dR = 0
woodbury_vector = np.zeros((num_inducing, Y.shape[1])) woodbury_vector = np.zeros((num_inducing, Y.shape[1]))
woodbury_inv_all = np.zeros((num_inducing, num_inducing, Y.shape[1])) woodbury_inv_all = np.zeros((num_inducing, num_inducing, Y.shape[1]))
dL_dKmm = 0 dL_dKmm = 0
log_marginal = 0 log_marginal = 0
Kmm = kern.K(Z) Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
#factor Kmm #factor Kmm
Lm = jitchol(Kmm) Lm = jitchol(Kmm)
if uncertain_inputs: LmInv = dtrtri(Lm) if uncertain_inputs: LmInv = dtrtri(Lm)
@ -303,10 +305,10 @@ class VarDTCMissingData(object):
# log marginal likelihood # log marginal likelihood
log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, log_marginal += _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit) psi0, A, LB, trYYT, data_fit,VVT_factor)
#put the gradients in the right places #put the gradients in the right places
partial_for_likelihood += _compute_partial_for_likelihood(likelihood, dL_dR += _compute_dL_dR(likelihood,
het_noise, uncertain_inputs, LB, het_noise, uncertain_inputs, LB,
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
psi0, psi1, beta, psi0, psi1, beta,
@ -323,22 +325,23 @@ class VarDTCMissingData(object):
Bi, _ = dpotri(LB, lower=1) Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi) symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0] Bi = -dpotri(LB, lower=1)[0]
from ...util import diag
diag.add(Bi, 1) diag.add(Bi, 1)
woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None] woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None]
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
# gradients: # gradients:
if uncertain_inputs: if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm, grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dpsi0':dL_dpsi0_all, 'dL_dpsi0':dL_dpsi0_all,
'dL_dpsi1':dL_dpsi1_all, 'dL_dpsi1':dL_dpsi1_all,
'dL_dpsi2':dL_dpsi2_all, 'dL_dpsi2':dL_dpsi2_all,
'partial_for_likelihood':partial_for_likelihood} 'dL_dthetaL':dL_dthetaL}
else: else:
grad_dict = {'dL_dKmm': dL_dKmm, grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dKdiag':dL_dpsi0_all, 'dL_dKdiag':dL_dpsi0_all,
'dL_dKnm':dL_dpsi1_all, 'dL_dKnm':dL_dpsi1_all,
'partial_for_likelihood':partial_for_likelihood} 'dL_dthetaL':dL_dthetaL}
#get sufficient things for posterior prediction #get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop? #TODO: do we really want to do this in the loop?
@ -384,40 +387,41 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C
return dL_dpsi0, dL_dpsi1, dL_dpsi2 return dL_dpsi0, dL_dpsi1, dL_dpsi2
def _compute_partial_for_likelihood(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT): def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT, Y):
# the partial derivative vector for the likelihood # the partial derivative vector for the likelihood
if likelihood.size == 0: if likelihood.size == 0:
# save computation here. # save computation here.
partial_for_likelihood = None dL_dR = None
elif het_noise: elif het_noise:
if uncertain_inputs: if uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
else: else:
from ...util.linalg import chol_inv #from ...util.linalg import chol_inv
LBi = chol_inv(LB) #LBi = chol_inv(LB)
LBi, _ = dtrtrs(LB,np.eye(LB.shape[0]))
Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0) Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0)
_LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0) _LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0)
partial_for_likelihood = -0.5 * beta + 0.5 * likelihood.V**2 dL_dR = -0.5 * beta + 0.5 * (beta*Y)**2
partial_for_likelihood += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2 dL_dR += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2 dL_dR += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
partial_for_likelihood += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * likelihood.Y * beta**2
partial_for_likelihood += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
dL_dR += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * Y * beta**2
dL_dR += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
else: else:
# likelihood is not heteroscedatic # likelihood is not heteroscedatic
partial_for_likelihood = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2 dL_dR = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
partial_for_likelihood += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta) dL_dR += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
partial_for_likelihood += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit) dL_dR += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
return partial_for_likelihood return dL_dR
def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit): def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit,Y):
#compute log marginal likelihood #compute log marginal likelihood
if het_noise: if het_noise:
lik_1 = -0.5 * num_data * output_dim * np.log(2. * np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(likelihood.V * likelihood.Y) lik_1 = -0.5 * num_data * output_dim * np.log(2. * np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(beta * np.square(Y).sum(axis=-1))
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A)) lik_2 = -0.5 * output_dim * (np.sum(beta.flatten() * psi0) - np.trace(A))
else: else:
lik_1 = -0.5 * num_data * output_dim * (np.log(2. * np.pi) - np.log(beta)) - 0.5 * beta * trYYT lik_1 = -0.5 * num_data * output_dim * (np.log(2. * np.pi) - np.log(beta)) - 0.5 * beta * trYYT
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A)) lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))

View file

@ -1,49 +1,51 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np import numpy as np
import itertools import itertools
from linear import Linear from ...util.caching import Cache_this
from ...core.parameterization import Parameterized from kern import CombinationKernel
from ...core.parameterization.param import Param
from kern import Kern
class Add(Kern): class Add(CombinationKernel):
def __init__(self, subkerns, tensor): """
assert all([isinstance(k, Kern) for k in subkerns]) Add given list of kernels together.
if tensor: propagates gradients through.
input_dim = sum([k.input_dim for k in subkerns])
self.input_slices = []
n = 0
for k in subkerns:
self.input_slices.append(slice(n, n+k.input_dim))
n += k.input_dim
else:
assert all([k.input_dim == subkerns[0].input_dim for k in subkerns])
input_dim = subkerns[0].input_dim
self.input_slices = [slice(None) for k in subkerns]
super(Add, self).__init__(input_dim, 'add')
self.add_parameters(*subkerns)
This kernel will take over the active dims of it's subkernels passed in.
"""
def __init__(self, subkerns, name='add'):
super(Add, self).__init__(subkerns, name)
def K(self, X, X2=None): @Cache_this(limit=2, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None):
""" """
Compute the kernel function. Add all kernels together.
If a list of parts (of this kernel!) `which_parts` is given, only
:param X: the first set of inputs to the kernel the parts of the list are taken to compute the covariance.
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handLes this as X2 == X.
""" """
assert X.shape[1] == self.input_dim assert X.shape[1] == self.input_dim
if X2 is None: if which_parts is None:
return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)]) which_parts = self.parts
else: elif not isinstance(which_parts, (list, tuple)):
return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) # if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.K(X, X2) for p in which_parts))
def update_gradients_full(self, dL_dK, X): @Cache_this(limit=2, force_kwargs=['which_parts'])
[p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None):
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X. """Compute the gradient of the objective function with respect to X.
@ -55,92 +57,77 @@ class Add(Kern):
:param X2: Observed data inputs (optional, defaults to X) :param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)""" :type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros_like(X) target = np.zeros(X.shape)
if X2 is None: [target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
else:
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
return target return target
def Kdiag(self, X): def gradients_X_diag(self, dL_dKdiag, X):
assert X.shape[1] == self.input_dim target = np.zeros(X.shape)
return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)],0) return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0) psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
#return psi2
# compute the "cross" terms # compute the "cross" terms
from white import White from static import White, Bias
from rbf import RBF from rbf import RBF
#from rbf_inv import RBFInv #from rbf_inv import RBFInv
from bias import Bias
from linear import Linear from linear import Linear
#ffrom fixed import Fixed #ffrom fixed import Fixed
for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self.input_slices), 2): for p1, p2 in itertools.combinations(self.parts, 2):
# i1, i2 = p1.active_dims, p2.active_dims
# white doesn;t combine with anything # white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White): if isinstance(p1, White) or isinstance(p2, White):
pass pass
# rbf X bias # rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)): elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2]) tmp = p2.psi1(Z, variational_posterior)
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)): elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z[:,i1], mu[:,i1], S[:,i1]) tmp = p1.psi1(Z, variational_posterior)
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
tmp1 = p1.psi1(Z, variational_posterior)
tmp2 = p2.psi1(Z, variational_posterior)
psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
else: else:
raise NotImplementedError, "psi2 cannot be computed for this kernel" raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2 return psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from white import White from static import White, Bias
from rbf import RBF for p1 in self.parts:
#from rbf_inv import RBFInv
#from bias import Bias
from linear import Linear
#ffrom fixed import Fixed
for p1, is1 in zip(self._parameters_, self.input_slices):
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2! #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy() eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices): for p2 in self.parts:
if p2 is p1: if p2 is p1:
continue continue
if isinstance(p2, White): if isinstance(p2, White):
continue continue
elif isinstance(p2, Bias): elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else: else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from white import White from static import White, Bias
from rbf import RBF
#from rbf_inv import rbfinv
from bias import Bias
from linear import Linear
#ffrom fixed import fixed
target = np.zeros(Z.shape) target = np.zeros(Z.shape)
for p1, is1 in zip(self._parameters_, self.input_slices): for p1 in self.parts:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy() eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices): for p2 in self.parts:
if p2 is p1: if p2 is p1:
continue continue
if isinstance(p2, White): if isinstance(p2, White):
@ -148,63 +135,39 @@ class Add(Kern):
elif isinstance(p2, Bias): elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else: else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
target += p1.gradients_z_variational(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
return target return target
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from white import white from static import White, Bias
from rbf import rbf target_mu = np.zeros(variational_posterior.shape)
#from rbf_inv import rbfinv target_S = np.zeros(variational_posterior.shape)
#from bias import bias for p1 in self._parameters_:
from linear import linear
#ffrom fixed import fixed
target_mu = np.zeros(mu.shape)
target_S = np.zeros(S.shape)
for p1, is1 in zip(self._parameters_, self.input_slices):
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy() eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices): for p2 in self._parameters_:
if p2 is p1: if p2 is p1:
continue continue
if isinstance(p2, white): if isinstance(p2, White):
continue continue
elif isinstance(p2, bias): elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else: else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
target_mu += a target_mu += a
target_S += b target_S += b
return target_mu, target_S return target_mu, target_S
def input_sensitivity(self):
in_sen = np.zeros((self.num_params, self.input_dim))
for i, [p, i_s] in enumerate(zip(self._parameters_, self.input_slices)):
in_sen[i, i_s] = p.input_sensitivity()
return in_sen
def _getstate(self): def _getstate(self):
""" """
Get the current state of the class, Get the current state of the class,
here just all the indices, rest can get recomputed here just all the indices, rest can get recomputed
""" """
return Parameterized._getstate(self) + [#self._parameters_, return super(Add, self)._getstate()
self.input_dim,
self.input_slices,
self._param_slices_
]
def _setstate(self, state): def _setstate(self, state):
self._param_slices_ = state.pop() super(Add, self)._setstate(state)
self.input_slices = state.pop()
self.input_dim = state.pop()
Parameterized._setstate(self, state)

View file

@ -17,9 +17,9 @@ class Brownian(Kern):
:param variance: :param variance:
:type variance: float :type variance: float
""" """
def __init__(self, input_dim=1, variance=1., name='Brownian'): def __init__(self, input_dim=1, variance=1., active_dims=None, name='Brownian'):
assert input_dim==1, "Brownian motion in 1D only" assert input_dim==1, "Brownian motion in 1D only"
super(Brownian, self).__init__(input_dim, name) super(Brownian, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp()) self.variance = Param('variance', variance, Logexp())
self.add_parameters(self.variance) self.add_parameters(self.variance)

View file

@ -34,8 +34,8 @@ class Coregionalize(Kern):
.. note: see coregionalization examples in GPy.examples.regression for some usage. .. note: see coregionalization examples in GPy.examples.regression for some usage.
""" """
def __init__(self, output_dim, rank=1, W=None, kappa=None, name='coregion'): def __init__(self, input_dim, output_dim, rank=1, W=None, kappa=None, active_dims=None, name='coregion'):
super(Coregionalize, self).__init__(input_dim=1, name=name) super(Coregionalize, self).__init__(input_dim, active_dims, name=name)
self.output_dim = output_dim self.output_dim = output_dim
self.rank = rank self.rank = rank
if self.rank>output_dim: if self.rank>output_dim:

View file

@ -2,8 +2,9 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern from kern import Kern, CombinationKernel
import numpy as np import numpy as np
import itertools
def index_to_slices(index): def index_to_slices(index):
""" """
@ -31,78 +32,109 @@ def index_to_slices(index):
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))] [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret return ret
class IndependentOutputs(Kern): class IndependentOutputs(CombinationKernel):
""" """
A kernel which can reopresent several independent functions. A kernel which can represent several independent functions.
this kernel 'switches off' parts of the matrix where the output indexes are different. this kernel 'switches off' parts of the matrix where the output indexes are different.
The index of the functions is given by the last column in the input X The index of the functions is given by the last column in the input X
the rest of the columns of X are passed to the underlying kernel for computation (in blocks). the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
:param kernels: either a kernel, or list of kernels to work with. If it is a list of kernels
the indices in the index_dim, index the kernels you gave!
""" """
def __init__(self, kern, name='independ'): def __init__(self, kernels, index_dim=-1, name='independ'):
super(IndependentOutputs, self).__init__(kern.input_dim+1, name) assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces"
self.kern = kern if not isinstance(kernels, list):
self.add_parameters(self.kern) self.single_kern = True
self.kern = kernels
kernels = [kernels]
else:
self.single_kern = False
self.kern = kernels
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
self.index_dim = index_dim
self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0])
def K(self,X ,X2=None): def K(self,X ,X2=None):
X, slices = X[:,:-1], index_to_slices(X[:,-1]) slices = index_to_slices(X[:,self.index_dim])
if X2 is None: if X2 is None:
target = np.zeros((X.shape[0], X.shape[0])) target = np.zeros((X.shape[0], X.shape[0]))
[[np.copyto(target[s,s], self.kern.K(X[s], None)) for s in slices_i] for slices_i in slices] [[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)]
else: else:
X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0])) target = np.zeros((X.shape[0], X2.shape[0]))
[[[np.copyto(target[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] [[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
return target return target
def Kdiag(self,X): def Kdiag(self,X):
X, slices = X[:,:-1], index_to_slices(X[:,-1]) slices = index_to_slices(X[:,self.index_dim])
target = np.zeros(X.shape[0]) target = np.zeros(X.shape[0])
[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices] [[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
return target return target
def update_gradients_full(self,dL_dK,X,X2=None): def update_gradients_full(self,dL_dK,X,X2=None):
target = np.zeros(self.kern.size) slices = index_to_slices(X[:,self.index_dim])
def collate_grads(dL, X, X2): if self.single_kern: target = np.zeros(self.kern.size)
self.kern.update_gradients_full(dL,X,X2) else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
self.kern._collect_gradient(target) def collate_grads(kern, i, dL, X, X2):
kern.update_gradients_full(dL,X,X2)
X,slices = X[:,:-1],index_to_slices(X[:,-1]) if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient
if X2 is None: if X2 is None:
[[collate_grads(dL_dK[s,s], X[s], None) for s in slices_i] for slices_i in slices] [[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))]
else: else:
X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1]) slices2 = index_to_slices(X2[:,self.index_dim])
[[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))]
if self.single_kern: kern.gradient = target
self.kern._set_gradient(target) else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
def gradients_X(self,dL_dK, X, X2=None): def gradients_X(self,dL_dK, X, X2=None):
target = np.zeros_like(X) target = np.zeros(X.shape)
X, slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None: if X2 is None:
[[np.copyto(target[s,:-1], self.kern.gradients_X(dL_dK[s,s],X[s],None)) for s in slices_i] for slices_i in slices] # TODO: make use of index_to_slices
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
for kern, s in zip(self.kerns, slices)]
#slices = index_to_slices(X[:,self.index_dim])
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
# for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
#import ipdb;ipdb.set_trace()
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
# np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss]))
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)]
else: else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1]) values = np.unique(X[:,self.index_dim])
[[[np.copyto(target[s,:-1], self.kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)] slices = [X[:,self.index_dim]==i for i in values]
slices2 = [X2[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2]))
for kern, s, s2 in zip(self.kerns, slices, slices2)]
# TODO: make work with index_to_slices
#slices = index_to_slices(X[:,self.index_dim])
#slices2 = index_to_slices(X2[:,self.index_dim])
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
return target return target
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
X, slices = X[:,:-1], index_to_slices(X[:,-1]) slices = index_to_slices(X[:,self.index_dim])
target = np.zeros(X.shape) target = np.zeros(X.shape)
[[np.copyto(target[s,:-1], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices] [[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
return target return target
def update_gradients_diag(self,dL_dKdiag,X,target): def update_gradients_diag(self, dL_dKdiag, X):
target = np.zeros(self.kern.size) slices = index_to_slices(X[:,self.index_dim])
def collate_grads(dL, X): if self.single_kern: target = np.zeros(self.kern.size)
self.kern.update_gradients_diag(dL,X) else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
self.kern._collect_gradient(target) def collate_grads(kern, i, dL, X):
X,slices = X[:,:-1],index_to_slices(X[:,-1]) kern.update_gradients_diag(dL,X)
[[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices] if self.single_kern: target[:] += kern.gradient
self.kern._set_gradient(target) else: target[i][:] += kern.gradient
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))]
if self.single_kern: kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
class Hierarchical(Kern): class Hierarchical(CombinationKernel):
""" """
A kernel which can reopresent a simple hierarchical model. A kernel which can reopresent a simple hierarchical model.
@ -113,7 +145,7 @@ class Hierarchical(Kern):
The index of the functions is given by additional columns in the input X. The index of the functions is given by additional columns in the input X.
""" """
def __init__(self, kerns, name='hierarchy'): def __init__(self, kern, name='hierarchy'):
assert all([k.input_dim==kerns[0].input_dim for k in kerns]) assert all([k.input_dim==kerns[0].input_dim for k in kerns])
super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name) super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
self.kerns = kerns self.kerns = kerns

View file

@ -3,28 +3,59 @@
import sys import sys
import numpy as np import numpy as np
import itertools from ...core.parameterization.parameterized import Parameterized
from ...core.parameterization import Parameterized from kernel_slice_operations import KernCallsViaSlicerMeta
from ...core.parameterization.param import Param from ...util.caching import Cache_this
class Kern(Parameterized): class Kern(Parameterized):
def __init__(self, input_dim, name, *a, **kw): #===========================================================================
# This adds input slice support. The rather ugly code for slicing can be
# found in kernel_slice_operations
__metaclass__ = KernCallsViaSlicerMeta
#===========================================================================
_debug=False
def __init__(self, input_dim, active_dims, name, *a, **kw):
""" """
The base class for a kernel: a positive definite function The base class for a kernel: a positive definite function
which forms of a covariance function (kernel). which forms of a covariance function (kernel).
:param input_dim: the number of input dimensions to the function :param int input_dim: the number of input dimensions to the function
:type input_dim: int :param array-like|slice active_dims: list of indices on which dimensions this kernel works on
Do not instantiate. Do not instantiate.
""" """
super(Kern, self).__init__(name=name, *a, **kw) super(Kern, self).__init__(name=name, *a, **kw)
self.active_dims = active_dims if active_dims is not None else slice(0, input_dim)
self.input_dim = input_dim self.input_dim = input_dim
assert isinstance(self.active_dims, (slice, list, tuple, np.ndarray)), 'active_dims needs to be an array-like or slice object over dimensions, {} given'.format(self.active_dims.__class__)
if isinstance(self.active_dims, slice):
self.active_dims = slice(self.active_dims.start or 0, self.active_dims.stop or self.input_dim, self.active_dims.step or 1)
active_dim_size = int(np.round((self.active_dims.stop-self.active_dims.start)/self.active_dims.step))
elif isinstance(self.active_dims, np.ndarray):
assert self.active_dims.ndim == 1, 'only flat indices allowed, given active_dims.shape={}, provide only indexes to the dimensions of the input'.format(self.active_dims.shape)
active_dim_size = self.active_dims.size
else:
active_dim_size = len(self.active_dims)
assert active_dim_size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, active_dim_size, self.active_dims)
self._sliced_X = 0
@Cache_this(limit=10)
def _slice_X(self, X):
return X[:, self.active_dims]
def K(self, X, X2): def K(self, X, X2):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handLes this as X2 == X.
"""
raise NotImplementedError raise NotImplementedError
def Kdiag(self, Xa): def Kdiag(self, X):
raise NotImplementedError raise NotImplementedError
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
raise NotImplementedError raise NotImplementedError
@ -34,7 +65,11 @@ class Kern(Parameterized):
raise NotImplementedError raise NotImplementedError
def gradients_X(self, dL_dK, X, X2): def gradients_X(self, dL_dK, X, X2):
raise NotImplementedError raise NotImplementedError
def gradients_X_diag(self, dL_dK, X): def gradients_X_diag(self, dL_dKdiag, X):
raise NotImplementedError
def update_gradients_diag(self, dL_dKdiag, X):
""" update the gradients of all parameters when using only the diagonal elements of the covariance matrix"""
raise NotImplementedError raise NotImplementedError
def update_gradients_full(self, dL_dK, X, X2): def update_gradients_full(self, dL_dK, X, X2):
@ -89,23 +124,16 @@ class Kern(Parameterized):
""" """
Returns the sensitivity for each dimension of this kernel. Returns the sensitivity for each dimension of this kernel.
""" """
return self.kern.input_sensitivity() return np.zeros(self.input_dim)
def __add__(self, other): def __add__(self, other):
""" Overloading of the '+' operator. for more control, see self.add """ """ Overloading of the '+' operator. for more control, see self.add """
return self.add(other) return self.add(other)
def add(self, other, tensor=False): def add(self, other, name='add'):
""" """
Add another kernel to this one. Add another kernel to this one.
If Tensor is False, both kernels are defined on the same _space_. then
the created kernel will have the same number of inputs as self and
other (which must be the same).
If Tensor is True, then the dimensions are stacked 'horizontally', so
that the resulting kernel has self.input_dim + other.input_dim
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern
@ -113,11 +141,11 @@ class Kern(Parameterized):
assert isinstance(other, Kern), "only kernels can be added to kernels..." assert isinstance(other, Kern), "only kernels can be added to kernels..."
from add import Add from add import Add
kernels = [] kernels = []
if not tensor and isinstance(self, Add): kernels.extend(self._parameters_) if isinstance(self, Add): kernels.extend(self._parameters_)
else: kernels.append(self) else: kernels.append(self)
if not tensor and isinstance(other, Add): kernels.extend(other._parameters_) if isinstance(other, Add): kernels.extend(other._parameters_)
else: kernels.append(other) else: kernels.append(other)
return Add(kernels, tensor) return Add(kernels, name=name)
def __mul__(self, other): def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information""" """ Here we overload the '*' operator. See self.prod for more information"""
@ -127,9 +155,12 @@ class Kern(Parameterized):
""" """
Shortcut for tensor `prod`. Shortcut for tensor `prod`.
""" """
return self.prod(other, tensor=True) assert self.active_dims == range(self.input_dim), "Can only use kernels, which have their input_dims defined from 0"
assert other.active_dims == range(other.input_dim), "Can only use kernels, which have their input_dims defined from 0"
other.active_dims += self.input_dim
return self.prod(other)
def prod(self, other, tensor=False, name=None): def prod(self, other, name='mul'):
""" """
Multiply two kernels (either on the same space, or on the tensor Multiply two kernels (either on the same space, or on the tensor
product of the input space). product of the input space).
@ -142,4 +173,60 @@ class Kern(Parameterized):
""" """
assert isinstance(other, Kern), "only kernels can be added to kernels..." assert isinstance(other, Kern), "only kernels can be added to kernels..."
from prod import Prod from prod import Prod
return Prod(self, other, tensor, name) #kernels = []
#if isinstance(self, Prod): kernels.extend(self._parameters_)
#else: kernels.append(self)
#if isinstance(other, Prod): kernels.extend(other._parameters_)
#else: kernels.append(other)
return Prod([self, other], name)
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return super(Kern, self)._getstate() + [
self.active_dims,
self.input_dim,
self._sliced_X]
def _setstate(self, state):
self._sliced_X = state.pop()
self.input_dim = state.pop()
self.active_dims = state.pop()
super(Kern, self)._setstate(state)
class CombinationKernel(Kern):
"""
Abstract super class for combination kernels.
A combination kernel combines (a list of) kernels and works on those.
Examples are the HierarchicalKernel or Add and Prod kernels.
"""
def __init__(self, kernels, name, extra_dims=[]):
"""
Abstract super class for combination kernels.
A combination kernel combines (a list of) kernels and works on those.
Examples are the HierarchicalKernel or Add and Prod kernels.
:param list kernels: List of kernels to combine (can be only one element)
:param str name: name of the combination kernel
:param array-like|slice extra_dims: if needed extra dimensions for the combination kernel to work on
"""
assert all([isinstance(k, Kern) for k in kernels])
active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
input_dim = active_dims.max()+1 + len(extra_dims)
active_dims = slice(active_dims.max()+1+len(extra_dims))
# initialize the kernel with the full input_dim
super(CombinationKernel, self).__init__(input_dim, active_dims, name)
self.extra_dims = extra_dims
self.add_parameters(*kernels)
@property
def parts(self):
return self._parameters_
def input_sensitivity(self):
in_sen = np.zeros((self.num_params, self.input_dim))
for i, p in enumerate(self.parts):
in_sen[i, p.active_dims] = p.input_sensitivity()
return in_sen

View file

@ -0,0 +1,134 @@
'''
Created on 11 Mar 2014
@author: maxz
'''
from ...core.parameterization.parameterized import ParametersChangedMeta
import numpy as np
class KernCallsViaSlicerMeta(ParametersChangedMeta):
def __call__(self, *args, **kw):
instance = super(ParametersChangedMeta, self).__call__(*args, **kw)
instance.K = _slice_wrapper(instance, instance.K)
instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True)
instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True)
instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True)
instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True)
instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True)
instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False)
instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False)
instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False)
instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True)
instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True)
instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True)
instance.parameters_changed()
return instance
def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False):
"""
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension.
The different switches are:
diag: if X2 exists
derivative: if first arg is dL_dK
psi_stat: if first 3 args are dL_dpsi0..2
psi_stat_Z: if first 2 args are dL_dpsi1..2
"""
if derivative:
if diag:
def x_slice_wrapper(dL_dKdiag, X):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced:
ret = np.zeros(X.shape)
X = kern._slice_X(X) if not kern._sliced_X else X
# if the return value is of shape X.shape, we need to make sure to return the right shape
kern._sliced_X += 1
try:
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X)
else: ret = operation(dL_dKdiag, X)
except:
raise
finally:
kern._sliced_X -= 1
return ret
elif psi_stat:
def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced:
ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
kern._sliced_X += 1
# if the return value is of shape X.shape, we need to make sure to return the right shape
try:
if ret_X_not_sliced:
ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior))
r2 = ret[:2]
ret[0] = ret1
ret[1] = ret2
ret[0][:, kern.active_dims] = r2[0]
ret[1][:, kern.active_dims] = r2[1]
del r2
else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
except:
raise
finally:
kern._sliced_X -= 1
return ret
elif psi_stat_Z:
def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced: ret = np.zeros(Z.shape)
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
kern._sliced_X += 1
try:
if ret_X_not_sliced:
ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
except:
raise
finally:
kern._sliced_X -= 1
return ret
else:
def x_slice_wrapper(dL_dK, X, X2=None):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced:
ret = np.zeros(X.shape)
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
kern._sliced_X += 1
try:
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2)
else: ret = operation(dL_dK, X, X2)
except:
raise
finally:
kern._sliced_X -= 1
return ret
else:
if diag:
def x_slice_wrapper(X, *args, **kw):
X = kern._slice_X(X) if not kern._sliced_X else X
kern._sliced_X += 1
try:
ret = operation(X, *args, **kw)
except:
raise
finally:
kern._sliced_X -= 1
return ret
else:
def x_slice_wrapper(X, X2=None, *args, **kw):
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
kern._sliced_X += 1
try:
ret = operation(X, X2, *args, **kw)
except: raise
finally:
kern._sliced_X -= 1
return ret
x_slice_wrapper._operation = operation
x_slice_wrapper.__name__ = ("slicer("+operation.__name__
+(","+str(bool(diag)) if diag else'')
+(','+str(bool(derivative)) if derivative else '')
+')')
x_slice_wrapper.__doc__ = "**sliced**\n" + (operation.__doc__ or "")
return x_slice_wrapper

View file

@ -34,8 +34,8 @@ class Linear(Kern):
""" """
def __init__(self, input_dim, variances=None, ARD=False, name='linear'): def __init__(self, input_dim, variances=None, ARD=False, active_dims=None, name='linear'):
super(Linear, self).__init__(input_dim, name) super(Linear, self).__init__(input_dim, active_dims, name)
self.ARD = ARD self.ARD = ARD
if not ARD: if not ARD:
if variances is not None: if variances is not None:
@ -147,7 +147,6 @@ class Linear(Kern):
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
mu2S = np.square(mu)+S mu2S = np.square(mu)+S
_dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma) _dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\ grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\
np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance) np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance)

View file

@ -31,8 +31,8 @@ class MLP(Kern):
""" """
def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=100., name='mlp'): def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=100., active_dims=None, name='mlp'):
super(MLP, self).__init__(input_dim, name) super(MLP, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp()) self.variance = Param('variance', variance, Logexp())
self.weight_variance = Param('weight_variance', weight_variance, Logexp()) self.weight_variance = Param('weight_variance', weight_variance, Logexp())
self.bias_variance = Param('bias_variance', bias_variance, Logexp()) self.bias_variance = Param('bias_variance', bias_variance, Logexp())
@ -96,12 +96,12 @@ class MLP(Kern):
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1. vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
return 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1) return 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target): def gradients_X_diag(self, dL_dKdiag, X):
"""Gradient of diagonal of covariance with respect to X""" """Gradient of diagonal of covariance with respect to X"""
self._K_diag_computations(X) self._K_diag_computations(X)
arg = self._K_diag_asin_arg arg = self._K_diag_asin_arg
denom = self._K_diag_denom denom = self._K_diag_denom
numer = self._K_diag_numer #numer = self._K_diag_numer
return four_over_tau*2.*self.weight_variance*self.variance*X*(1./denom*(1. - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None] return four_over_tau*2.*self.weight_variance*self.variance*X*(1./denom*(1. - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None]

View file

@ -10,7 +10,7 @@ from ...core.parameterization.param import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
class Periodic(Kern): class Periodic(Kern):
def __init__(self, input_dim, variance, lengthscale, period, n_freq, lower, upper, name): def __init__(self, input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name):
""" """
:type input_dim: int :type input_dim: int
:param variance: the variance of the Matern kernel :param variance: the variance of the Matern kernel
@ -25,7 +25,7 @@ class Periodic(Kern):
""" """
assert input_dim==1, "Periodic kernels are only defined for input_dim=1" assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
super(Periodic, self).__init__(input_dim, name) super(Periodic, self).__init__(input_dim, active_dims, name)
self.input_dim = input_dim self.input_dim = input_dim
self.lower,self.upper = lower, upper self.lower,self.upper = lower, upper
self.n_freq = n_freq self.n_freq = n_freq
@ -77,16 +77,17 @@ class PeriodicExponential(Periodic):
Only defined for input_dim=1. Only defined for input_dim=1.
""" """
def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, name='periodic_exponential'): def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_exponential'):
super(PeriodicExponential, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, name) super(PeriodicExponential, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)
def parameters_changed(self): def parameters_changed(self):
self.a = [1./self.lengthscale, 1.] self.a = [1./self.lengthscale, 1.]
self.b = [1] self.b = [1]
self.basis_alpha = np.ones((self.n_basis,)) self.basis_alpha = np.ones((self.n_basis,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))[:,0] self.basis_omega = (2*np.pi*np.arange(1,self.n_freq+1)/self.period).repeat(2)
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[])) self.basis_phi = np.zeros(self.n_freq * 2)
self.basis_phi[::2] = -np.pi/2
self.G = self.Gram_matrix() self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G) self.Gi = np.linalg.inv(self.G)
@ -100,7 +101,6 @@ class PeriodicExponential(Periodic):
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None] Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T)) return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T))
#@silence_errors
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)""" """derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
if X2 is None: X2 = X if X2 is None: X2 = X
@ -187,15 +187,16 @@ class PeriodicMatern32(Periodic):
""" """
def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, name='periodic_Matern32'): def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_Matern32'):
super(PeriodicMatern32, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, name) super(PeriodicMatern32, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)
def parameters_changed(self): def parameters_changed(self):
self.a = [3./self.lengthscale**2, 2*np.sqrt(3)/self.lengthscale, 1.] self.a = [3./self.lengthscale**2, 2*np.sqrt(3)/self.lengthscale, 1.]
self.b = [1,self.lengthscale**2/3] self.b = [1,self.lengthscale**2/3]
self.basis_alpha = np.ones((self.n_basis,)) self.basis_alpha = np.ones((self.n_basis,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[])) self.basis_omega = (2*np.pi*np.arange(1,self.n_freq+1)/self.period).repeat(2)
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[])) self.basis_phi = np.zeros(self.n_freq * 2)
self.basis_phi[::2] = -np.pi/2
self.G = self.Gram_matrix() self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G) self.Gi = np.linalg.inv(self.G)
@ -212,8 +213,8 @@ class PeriodicMatern32(Periodic):
return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T)) return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
@silence_errors #@silence_errors
def update_gradients_full(self,dL_dK,X,X2,target): def update_gradients_full(self,dL_dK,X,X2):
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)""" """derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@ -299,16 +300,17 @@ class PeriodicMatern52(Periodic):
""" """
def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, name='periodic_Matern52'): def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_Matern52'):
super(PeriodicMatern52, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, name) super(PeriodicMatern52, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)
def parameters_changed(self): def parameters_changed(self):
self.a = [5*np.sqrt(5)/self.lengthscale**3, 15./self.lengthscale**2,3*np.sqrt(5)/self.lengthscale, 1.] self.a = [5*np.sqrt(5)/self.lengthscale**3, 15./self.lengthscale**2,3*np.sqrt(5)/self.lengthscale, 1.]
self.b = [9./8, 9*self.lengthscale**4/200., 3*self.lengthscale**2/5., 3*self.lengthscale**2/(5*8.), 3*self.lengthscale**2/(5*8.)] self.b = [9./8, 9*self.lengthscale**4/200., 3*self.lengthscale**2/5., 3*self.lengthscale**2/(5*8.), 3*self.lengthscale**2/(5*8.)]
self.basis_alpha = np.ones((2*self.n_freq,)) self.basis_alpha = np.ones((2*self.n_freq,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[])) self.basis_omega = (2*np.pi*np.arange(1,self.n_freq+1)/self.period).repeat(2)
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[])) self.basis_phi = np.zeros(self.n_freq * 2)
self.basis_phi[::2] = -np.pi/2
self.G = self.Gram_matrix() self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G) self.Gi = np.linalg.inv(self.G)

View file

@ -1,10 +1,12 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
import numpy as np import numpy as np
from kern import CombinationKernel
from ...util.caching import Cache_this
import itertools
class Prod(Kern): class Prod(CombinationKernel):
""" """
Computes the product of 2 kernels Computes the product of 2 kernels
@ -15,49 +17,49 @@ class Prod(Kern):
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self, k1, k2, tensor=False,name=None): def __init__(self, kernels, name='mul'):
if tensor: assert len(kernels) == 2, 'only implemented for two kernels as of yet'
name = k1.name + '_xx_' + k2.name if name is None else name super(Prod, self).__init__(kernels, name)
super(Prod, self).__init__(k1.input_dim + k2.input_dim, name)
self.slice1 = slice(0,k1.input_dim)
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
else:
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
name = k1.name + '_x_' + k2.name if name is None else name
super(Prod, self).__init__(k1.input_dim, name)
self.slice1 = slice(0, self.input_dim)
self.slice2 = slice(0, self.input_dim)
self.k1 = k1
self.k2 = k2
self.add_parameters(self.k1, self.k2)
def K(self, X, X2=None): @Cache_this(limit=2, force_kwargs=['which_parts'])
if X2 is None: def K(self, X, X2=None, which_parts=None):
return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None) assert X.shape[1] == self.input_dim
else: if which_parts is None:
return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2]) which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.multiply, (p.K(X, X2) for p in which_parts))
def Kdiag(self, X): @Cache_this(limit=2, force_kwargs=['which_parts'])
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X): def update_gradients_full(self, dL_dK, X, X2=None):
self.k1.update_gradients_full(dL_dK*self.k2.K(X[:,self.slice2]), X[:,self.slice1]) for k1,k2 in itertools.combinations(self.parts, 2):
self.k2.update_gradients_full(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2]) k1.update_gradients_full(dL_dK*k2.K(X, X2), X, X2)
k2.update_gradients_full(dL_dK*k1.K(X, X2), X, X2)
def update_gradients_diag(self, dL_dKdiag, X):
for k1,k2 in itertools.combinations(self.parts, 2):
k1.update_gradients_diag(dL_dKdiag*k2.Kdiag(X), X)
k2.update_gradients_diag(dL_dKdiag*k1.Kdiag(X), X)
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)
if X2 is None: for k1,k2 in itertools.combinations(self.parts, 2):
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2.K(X[:,self.slice2]), X[:,self.slice1], None) target += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2)
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2], None) target += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2)
else:
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2.K(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1])
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1.K(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2])
return target return target
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape) target = np.zeros(X.shape)
target[:,self.slice1] = self.k1.gradients_X(dL_dKdiag*self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1]) for k1,k2 in itertools.combinations(self.parts, 2):
target[:,self.slice2] += self.k2.gradients_X(dL_dKdiag*self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2]) target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X)
target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X)
return target return target

View file

@ -19,9 +19,8 @@ class RBF(Stationary):
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf'):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'): super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
self.weave_options = {} self.weave_options = {}
def K_of_r(self, r): def K_of_r(self, r):
@ -67,7 +66,6 @@ class RBF(Stationary):
else: else:
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum() self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
#from psi2 #from psi2
self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum() self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
if self.ARD: if self.ARD:
@ -76,11 +74,14 @@ class RBF(Stationary):
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum() self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
elif isinstance(variational_posterior, variational.NormalPosterior): elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale**2
l2 = self.lengthscale **2 if l2.size != self.input_dim:
l2 = l2*np.ones(self.input_dim)
#contributions from psi0: #contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0) self.variance.gradient = np.sum(dL_dpsi0)
if self._debug:
num_grad = self.lengthscale.gradient.copy()
self.lengthscale.gradient = 0. self.lengthscale.gradient = 0.
#from psi1 #from psi1
@ -92,16 +93,16 @@ class RBF(Stationary):
else: else:
self.lengthscale.gradient += dpsi1_dlength.sum() self.lengthscale.gradient += dpsi1_dlength.sum()
self.variance.gradient += np.sum(dL_dpsi1 * psi1) / self.variance self.variance.gradient += np.sum(dL_dpsi1 * psi1) / self.variance
#from psi2 #from psi2
S = variational_posterior.variance S = variational_posterior.variance
_, Zdist_sq, _, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior) _, Zdist_sq, _, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
if not self.ARD: if not self.ARD:
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2).sum() self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2).sum()
else: else:
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2) self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
if self._debug:
import ipdb;ipdb.set_trace()
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
else: else:
@ -122,7 +123,6 @@ class RBF(Stationary):
return grad return grad
elif isinstance(variational_posterior, variational.NormalPosterior): elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale **2 l2 = self.lengthscale **2
#psi1 #psi1
@ -153,6 +153,7 @@ class RBF(Stationary):
grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1) grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1) grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1) grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
#psi2 #psi2
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)

View file

@ -33,9 +33,9 @@ class SSRBF(Stationary):
.. Note: this object implements both the ARD and 'spherical' version of the function .. Note: this object implements both the ARD and 'spherical' version of the function
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=True, name='SSRBF'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=True, active_dims=None, name='SSRBF'):
assert ARD==True, "Not Implemented!" assert ARD==True, "Not Implemented!"
super(SSRBF, self).__init__(input_dim, variance, lengthscale, ARD, name) super(SSRBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r): def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r**2) return self.variance * np.exp(-0.5 * r**2)

View file

@ -9,8 +9,8 @@ from ...core.parameterization.transformations import Logexp
import numpy as np import numpy as np
class Static(Kern): class Static(Kern):
def __init__(self, input_dim, variance, name): def __init__(self, input_dim, variance, active_dims, name):
super(Static, self).__init__(input_dim, name) super(Static, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp()) self.variance = Param('variance', variance, Logexp())
self.add_parameters(self.variance) self.add_parameters(self.variance)
@ -43,8 +43,8 @@ class Static(Kern):
class White(Static): class White(Static):
def __init__(self, input_dim, variance=1., name='white'): def __init__(self, input_dim, variance=1., active_dims=None, name='white'):
super(White, self).__init__(input_dim, variance, name) super(White, self).__init__(input_dim, variance, active_dims, name)
def K(self, X, X2=None): def K(self, X, X2=None):
if X2 is None: if X2 is None:
@ -55,7 +55,7 @@ class White(Static):
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
return np.zeros((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) return np.zeros((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_full(self, dL_dK, X): def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.trace(dL_dK) self.variance.gradient = np.trace(dL_dK)
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
@ -66,8 +66,8 @@ class White(Static):
class Bias(Static): class Bias(Static):
def __init__(self, input_dim, variance=1., name='bias'): def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
super(Bias, self).__init__(input_dim, variance, name) super(Bias, self).__init__(input_dim, variance, active_dims, name)
def K(self, X, X2=None): def K(self, X, X2=None):
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0]) shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
@ -79,13 +79,41 @@ class Bias(Static):
self.variance.gradient = dL_dK.sum() self.variance.gradient = dL_dK.sum()
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dK.sum() self.variance.gradient = dL_dKdiag.sum()
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
ret = np.empty((mu.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) ret = np.empty((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
ret[:] = self.variance**2 ret[:] = self.variance**2
return ret return ret
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum() self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
class Fixed(Static):
def __init__(self, input_dim, covariance_matrix, variance=1., active_dims=None, name='fixed'):
"""
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
super(Bias, self).__init__(input_dim, variance, active_dims, name)
self.fixed_K = covariance_matrix
def K(self, X, X2):
return self.variance * self.fixed_K
def Kdiag(self, X):
return self.variance * self.fixed_K.diag()
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K)
def psi2(self, Z, variational_posterior):
return np.zeros((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum()

View file

@ -41,8 +41,8 @@ class Stationary(Kern):
""" """
def __init__(self, input_dim, variance, lengthscale, ARD, name): def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name):
super(Stationary, self).__init__(input_dim, name) super(Stationary, self).__init__(input_dim, active_dims, name)
self.ARD = ARD self.ARD = ARD
if not ARD: if not ARD:
if lengthscale is None: if lengthscale is None:
@ -85,15 +85,19 @@ class Stationary(Kern):
Compute the Euclidean distance between each row of X and X2, or between Compute the Euclidean distance between each row of X and X2, or between
each pair of rows of X if X2 is None. each pair of rows of X if X2 is None.
""" """
#X, = self._slice_X(X)
if X2 is None: if X2 is None:
Xsq = np.sum(np.square(X),1) Xsq = np.sum(np.square(X),1)
r2 = -2.*tdot(X) + (Xsq[:,None] + Xsq[None,:]) r2 = -2.*tdot(X) + (Xsq[:,None] + Xsq[None,:])
util.diag.view(r2)[:,]= 0. # force diagnoal to be zero: sometime numerically a little negative util.diag.view(r2)[:,]= 0. # force diagnoal to be zero: sometime numerically a little negative
return np.sqrt(r2) return np.sqrt(r2)
else: else:
#X2, = self._slice_X(X2)
X1sq = np.sum(np.square(X),1) X1sq = np.sum(np.square(X),1)
X2sq = np.sum(np.square(X2),1) X2sq = np.sum(np.square(X2),1)
return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:])) r2 = -2.*np.dot(X, X2.T) + X1sq[:,None] + X2sq[None,:]
r2[r2<0] = 0. # A bit hacky
return np.sqrt(r2)
@Cache_this(limit=5, ignore_args=()) @Cache_this(limit=5, ignore_args=())
def _scaled_dist(self, X, X2=None): def _scaled_dist(self, X, X2=None):
@ -124,7 +128,6 @@ class Stationary(Kern):
self.lengthscale.gradient = 0. self.lengthscale.gradient = 0.
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance) self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
#now the lengthscale gradient(s) #now the lengthscale gradient(s)
@ -136,7 +139,7 @@ class Stationary(Kern):
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3 #self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
tmp = dL_dr*self._inv_dist(X, X2) tmp = dL_dr*self._inv_dist(X, X2)
if X2 is None: X2 = X if X2 is None: X2 = X
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)]) self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(self._slice_X(X)[:,q:q+1] - self._slice_X(X2)[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
else: else:
r = self._scaled_dist(X, X2) r = self._scaled_dist(X, X2)
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
@ -176,7 +179,6 @@ class Stationary(Kern):
ret = np.empty(X.shape, dtype=np.float64) ret = np.empty(X.shape, dtype=np.float64)
[np.einsum('ij,ij->i', tmp, X[:,q][:,None]-X2[:,q][None,:], out=ret[:,q]) for q in xrange(self.input_dim)] [np.einsum('ij,ij->i', tmp, X[:,q][:,None]-X2[:,q][None,:], out=ret[:,q]) for q in xrange(self.input_dim)]
ret /= self.lengthscale**2 ret /= self.lengthscale**2
return ret return ret
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
@ -186,8 +188,8 @@ class Stationary(Kern):
return np.ones(self.input_dim)/self.lengthscale return np.ones(self.input_dim)/self.lengthscale
class Exponential(Stationary): class Exponential(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Exponential'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, name) super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r): def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r) return self.variance * np.exp(-0.5 * r)
@ -205,8 +207,8 @@ class Matern32(Stationary):
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat32'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat32'):
super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, name) super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r): def K_of_r(self, r):
return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r) return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
@ -249,8 +251,8 @@ class Matern52(Stationary):
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Mat52'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, name) super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r): def K_of_r(self, r):
return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r) return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
@ -291,8 +293,8 @@ class Matern52(Stationary):
class ExpQuad(Stationary): class ExpQuad(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='ExpQuad'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='ExpQuad'):
super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, name) super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r): def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r**2) return self.variance * np.exp(-0.5 * r**2)
@ -301,8 +303,8 @@ class ExpQuad(Stationary):
return -r*self.K_of_r(r) return -r*self.K_of_r(r)
class Cosine(Stationary): class Cosine(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='Cosine'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Cosine'):
super(Cosine, self).__init__(input_dim, variance, lengthscale, ARD, name) super(Cosine, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r): def K_of_r(self, r):
return self.variance * np.cos(r) return self.variance * np.cos(r)
@ -322,8 +324,8 @@ class RatQuad(Stationary):
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, name='ExpQuad'): def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, active_dims=None, name='ExpQuad'):
super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, name) super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
self.power = Param('power', power, Logexp()) self.power = Param('power', power, Logexp())
self.add_parameters(self.power) self.add_parameters(self.power)

View file

@ -26,13 +26,13 @@ class Sympykern(Kern):
- to handle multiple inputs, call them x_1, z_1, etc - to handle multiple inputs, call them x_1, z_1, etc
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j. - to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
""" """
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None): def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None, active_dims=None):
if name is None: if name is None:
name='sympykern' name='sympykern'
if k is None: if k is None:
raise ValueError, "You must provide an argument for the covariance function." raise ValueError, "You must provide an argument for the covariance function."
super(Sympykern, self).__init__(input_dim, name) super(Sympykern, self).__init__(input_dim, active_dims, name)
self._sp_k = k self._sp_k = k
@ -116,6 +116,7 @@ class Sympykern(Kern):
if self.output_dim > 1: if self.output_dim > 1:
self.arg_list += self._sp_theta_i + self._sp_theta_j self.arg_list += self._sp_theta_i + self._sp_theta_j
self.diag_arg_list += self._sp_theta_i self.diag_arg_list += self._sp_theta_i
# psi_stats aren't yet implemented. # psi_stats aren't yet implemented.
if False: if False:
self.compute_psi_stats() self.compute_psi_stats()

View file

@ -5,3 +5,4 @@ from gamma import Gamma
from poisson import Poisson from poisson import Poisson
from student_t import StudentT from student_t import StudentT
from likelihood import Likelihood from likelihood import Likelihood
from mixed_noise import MixedNoise

View file

@ -5,6 +5,7 @@ import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions import link_functions
from likelihood import Likelihood from likelihood import Likelihood
from scipy import stats
class Bernoulli(Likelihood): class Bernoulli(Likelihood):
""" """
@ -43,7 +44,7 @@ class Bernoulli(Likelihood):
Y_prep[Y.flatten() == 0] = -1 Y_prep[Y.flatten() == 0] = -1
return Y_prep return Y_prep
def moments_match_ep(self, data_i, tau_i, v_i): def moments_match_ep(self, Y_i, tau_i, v_i):
""" """
Moments match of the marginal approximation in EP algorithm Moments match of the marginal approximation in EP algorithm
@ -51,9 +52,9 @@ class Bernoulli(Likelihood):
:param tau_i: precision of the cavity distribution (float) :param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float) :param v_i: mean/variance of the cavity distribution (float)
""" """
if data_i == 1: if Y_i == 1:
sign = 1. sign = 1.
elif data_i == 0: elif Y_i == 0:
sign = -1 sign = -1
else: else:
raise ValueError("bad value for Bernouilli observation (0, 1)") raise ValueError("bad value for Bernouilli observation (0, 1)")
@ -76,7 +77,7 @@ class Bernoulli(Likelihood):
return Z_hat, mu_hat, sigma2_hat return Z_hat, mu_hat, sigma2_hat
def predictive_mean(self, mu, variance): def predictive_mean(self, mu, variance, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Probit): if isinstance(self.gp_link, link_functions.Probit):
return stats.norm.cdf(mu/np.sqrt(1+variance)) return stats.norm.cdf(mu/np.sqrt(1+variance))
@ -87,13 +88,12 @@ class Bernoulli(Likelihood):
else: else:
raise NotImplementedError raise NotImplementedError
def predictive_variance(self, mu, variance, pred_mean): def predictive_variance(self, mu, variance, pred_mean, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Heaviside): if isinstance(self.gp_link, link_functions.Heaviside):
return 0. return 0.
else: else:
return np.nan return np.nan
#raise NotImplementedError
def pdf_link(self, link_f, y, Y_metadata=None): def pdf_link(self, link_f, y, Y_metadata=None):
""" """
@ -212,7 +212,7 @@ class Bernoulli(Likelihood):
np.seterr(**state) np.seterr(**state)
return d3logpdf_dlink3 return d3logpdf_dlink3
def samples(self, gp): def samples(self, gp, Y_metadata=None):
""" """
Returns a set of samples of observations based on a given value of the latent variable. Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -18,6 +18,7 @@ import link_functions
from likelihood import Likelihood from likelihood import Likelihood
from ..core.parameterization import Param from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp from ..core.parameterization.transformations import Logexp
from scipy import stats
class Gaussian(Likelihood): class Gaussian(Likelihood):
""" """
@ -49,11 +50,18 @@ class Gaussian(Likelihood):
if isinstance(gp_link, link_functions.Identity): if isinstance(gp_link, link_functions.Identity):
self.log_concave = True self.log_concave = True
def covariance_matrix(self, Y, Y_metadata=None): def betaY(self,Y,Y_metadata=None):
return np.eye(Y.shape[0]) * self.variance #TODO: ~Ricardo this does not live here
return Y/self.gaussian_variance(Y_metadata)
def update_gradients(self, partial): def gaussian_variance(self, Y_metadata=None):
self.variance.gradient = np.sum(partial) return self.variance
def update_gradients(self, grad):
self.variance.gradient = grad
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
return dL_dKdiag.sum()
def _preprocess_values(self, Y): def _preprocess_values(self, Y):
""" """
@ -76,16 +84,12 @@ class Gaussian(Likelihood):
Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var) Z_hat = 1./np.sqrt(2.*np.pi*sum_var)*np.exp(-.5*(data_i - v_i/tau_i)**2./sum_var)
return Z_hat, mu_hat, sigma2_hat return Z_hat, mu_hat, sigma2_hat
def predictive_values(self, mu, var, full_cov=False): def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
if full_cov: if full_cov:
var += np.eye(var.shape[0])*self.variance var += np.eye(var.shape[0])*self.variance
d = 2*np.sqrt(np.diag(var))
low, up = mu - d, mu + d
else: else:
var += self.variance var += self.variance
d = 2*np.sqrt(var) return mu, var
low, up = mu - d, mu + d
return mu, var, low, up
def predictive_mean(self, mu, sigma): def predictive_mean(self, mu, sigma):
return mu return mu
@ -93,7 +97,14 @@ class Gaussian(Likelihood):
def predictive_variance(self, mu, sigma, predictive_mean=None): def predictive_variance(self, mu, sigma, predictive_mean=None):
return self.variance + sigma**2 return self.variance + sigma**2
<<<<<<< HEAD
def pdf_link(self, link_f, y, Y_metadata=None): def pdf_link(self, link_f, y, Y_metadata=None):
=======
def predictive_quantiles(self, mu, var, quantiles, Y_metadata):
return [stats.norm.ppf(q/100.)*np.sqrt(var) + mu for q in quantiles]
def pdf_link(self, link_f, y, extra_data=None):
>>>>>>> a3287c38ea775155df4e90f7fe1883d12ffb54b9
""" """
Likelihood function given link(f) Likelihood function given link(f)
@ -292,7 +303,7 @@ class Gaussian(Likelihood):
""" """
return self.variance return self.variance
def samples(self, gp): def samples(self, gp, Y_metadata=None):
""" """
Returns a set of samples of observations based on a given value of the latent variable. Returns a set of samples of observations based on a given value of the latent variable.
@ -300,6 +311,8 @@ class Gaussian(Likelihood):
""" """
orig_shape = gp.shape orig_shape = gp.shape
gp = gp.flatten() gp = gp.flatten()
#orig_shape = gp.shape
gp = gp.flatten()
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp]) Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
return Ysim.reshape(orig_shape) return Ysim.reshape(orig_shape)

View file

@ -58,6 +58,18 @@ class Likelihood(Parameterized):
""" """
return Y return Y
def conditional_mean(self, gp):
"""
The mean of the random variable conditioned on one value of the GP
"""
raise NotImplementedError
def conditional_variance(self, gp):
"""
The variance of the random variable conditioned on one value of the GP
"""
raise NotImplementedError
def log_predictive_density(self, y_test, mu_star, var_star): def log_predictive_density(self, y_test, mu_star, var_star):
""" """
Calculation of the log predictive density Calculation of the log predictive density
@ -120,7 +132,7 @@ class Likelihood(Parameterized):
return z, mean, variance return z, mean, variance
def _predictive_mean(self, mu, variance): def predictive_mean(self, mu, variance, Y_metadata=None):
""" """
Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) ) Quadrature calculation of the predictive mean: E(Y_star|Y) = E( E(Y_star|f_star, Y) )
@ -128,8 +140,14 @@ class Likelihood(Parameterized):
:param sigma: standard deviation of posterior :param sigma: standard deviation of posterior
""" """
#conditional_mean: the edpected value of y given some f, under this likelihood
def int_mean(f,m,v): def int_mean(f,m,v):
return self._mean(f)*np.exp(-(0.5/v)*np.square(f - m)) p = np.exp(-(0.5/v)*np.square(f - m))
#If p is zero then conditional_mean will overflow
if p < 1e-10:
return 0.
else:
return self.conditional_mean(f)*p
scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance)) mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
@ -139,7 +157,7 @@ class Likelihood(Parameterized):
"""Quadrature calculation of the conditional mean: E(Y_star|f)""" """Quadrature calculation of the conditional mean: E(Y_star|f)"""
raise NotImplementedError, "implement this function to make predictions" raise NotImplementedError, "implement this function to make predictions"
def _predictive_variance(self,mu,variance,predictive_mean=None): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
""" """
Numerical approximation to the predictive variance: V(Y_star) Numerical approximation to the predictive variance: V(Y_star)
@ -156,7 +174,12 @@ class Likelihood(Parameterized):
# E( V(Y_star|f_star) ) # E( V(Y_star|f_star) )
def int_var(f,m,v): def int_var(f,m,v):
return self._variance(f)*np.exp(-(0.5/v)*np.square(f - m)) p = np.exp(-(0.5/v)*np.square(f - m))
#If p is zero then conditional_variance will overflow
if p < 1e-10:
return 0.
else:
return self.conditional_variance(f)*p
scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
@ -169,13 +192,20 @@ class Likelihood(Parameterized):
#E( E(Y_star|f_star)**2 ) #E( E(Y_star|f_star)**2 )
def int_pred_mean_sq(f,m,v,predictive_mean_sq): def int_pred_mean_sq(f,m,v,predictive_mean_sq):
return self._mean(f)**2*np.exp(-(0.5/v)*np.square(f - m)) p = np.exp(-(0.5/v)*np.square(f - m))
#If p is zero then conditional_mean**2 will overflow
if p < 1e-10:
return 0.
else:
return self.conditional_mean(f)**2*p
scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)] scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
var_exp = exp_exp2 - predictive_mean_sq var_exp = exp_exp2 - predictive_mean_sq
# V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) # V(Y_star) = E[ V(Y_star|f_star) ] + V[ E(Y_star|f_star) ]
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
return exp_var + var_exp return exp_var + var_exp
def pdf_link(self, link_f, y, Y_metadata=None): def pdf_link(self, link_f, y, Y_metadata=None):
@ -362,18 +392,33 @@ class Likelihood(Parameterized):
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
def predictive_values(self, mu, var): def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
""" """
Compute mean, variance of the predictive distibution. Compute mean, variance of the predictive distibution.
:param mu: mean of the latent variable, f, of posterior :param mu: mean of the latent variable, f, of posterior
:param var: variance of the latent variable, f, of posterior :param var: variance of the latent variable, f, of posterior
:param full_cov: whether to use the full covariance or just the diagonal
:type full_cov: Boolean
""" """
pred_mean = self.predictive_mean(mu, var)
pred_var = self.predictive_variance(mu, var, pred_mean) pred_mean = self.predictive_mean(mu, var, Y_metadata)
pred_var = self.predictive_variance(mu, var, pred_mean, Y_metadata)
return pred_mean, pred_var return pred_mean, pred_var
def samples(self, gp): def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
#compute the quantiles by sampling!!!
N_samp = 1000
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
#ss_f = s.flatten()
#ss_y = self.samples(ss_f, Y_metadata)
ss_y = self.samples(s, Y_metadata)
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
def samples(self, gp, Y_metadata=None):
""" """
Returns a set of samples of observations based on a given value of the latent variable. Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -6,6 +6,9 @@ from scipy import stats
import scipy as sp import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
_exp_lim_val = np.finfo(np.float64).max
_lim_val = np.log(_exp_lim_val)
class GPTransformation(object): class GPTransformation(object):
""" """
Link function class for doing non-Gaussian likelihoods approximation Link function class for doing non-Gaussian likelihoods approximation
@ -92,16 +95,16 @@ class Log(GPTransformation):
""" """
def transf(self,f): def transf(self,f):
return np.exp(f) return np.exp(np.clip(f, -_lim_val, _lim_val))
def dtransf_df(self,f): def dtransf_df(self,f):
return np.exp(f) return np.exp(np.clip(f, -_lim_val, _lim_val))
def d2transf_df2(self,f): def d2transf_df2(self,f):
return np.exp(f) return np.exp(np.clip(f, -_lim_val, _lim_val))
def d3transf_df3(self,f): def d3transf_df3(self,f):
return np.exp(f) return np.exp(np.clip(f, -_lim_val, _lim_val))
class Log_ex_1(GPTransformation): class Log_ex_1(GPTransformation):
""" """

View file

@ -0,0 +1,87 @@
import numpy as np
from scipy import stats, special
from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions
from likelihood import Likelihood
from gaussian import Gaussian
from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
from ..core.parameterization import Parameterized
import itertools
class MixedNoise(Likelihood):
def __init__(self, likelihoods_list, name='mixed_noise'):
super(Likelihood, self).__init__(name=name)
self.add_parameters(*likelihoods_list)
self.likelihoods_list = likelihoods_list
self.log_concave = False
def gaussian_variance(self, Y_metadata):
assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
ind = Y_metadata['output_index'].flatten()
variance = np.zeros(ind.size)
for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))):
variance[ind==j] = lik.variance
return variance[:,None]
def betaY(self,Y,Y_metadata):
return Y/self.gaussian_variance(Y_metadata=Y_metadata)
def update_gradients(self, gradients):
self.gradient = gradients
def exact_inference_gradients(self, dL_dKdiag, Y_metadata):
assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
ind = Y_metadata['output_index'].flatten()
return np.array([dL_dKdiag[ind==i].sum() for i in range(len(self.likelihoods_list))])
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
if all([isinstance(l, Gaussian) for l in self.likelihoods_list]):
ind = Y_metadata['output_index'].flatten()
_variance = np.array([self.likelihoods_list[j].variance for j in ind ])
if full_cov:
var += np.eye(var.shape[0])*_variance
else:
var += _variance
return mu, var
else:
raise NotImplementedError
def predictive_variance(self, mu, sigma, **other_shit):
if isinstance(noise_index,int):
_variance = self.variance[noise_index]
else:
_variance = np.array([ self.variance[j] for j in noise_index ])[:,None]
return _variance + sigma**2
def covariance_matrix(self, Y, Y_metadata):
#assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
#ind = Y_metadata['output_index'].flatten()
#variance = np.zeros(Y.shape[0])
#for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))):
# variance[ind==j] = lik.variance
#return np.diag(variance)
return np.diag(self.gaussian_variance(Y_metadata).flatten())
def samples(self, gp, Y_metadata):
"""
Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable
"""
N1, N2 = gp.shape
Ysim = np.zeros((N1,N2))
ind = Y_metadata['output_index'].flatten()
for j in np.unique(ind):
flt = ind==j
gp_filtered = gp[flt,:]
n1 = gp_filtered.shape[0]
lik = self.likelihoods_list[j]
_ysim = np.array([np.random.normal(lik.gp_link.transf(gpj), scale=np.sqrt(lik.variance), size=1) for gpj in gp_filtered.flatten()])
Ysim[flt,:] = _ysim.reshape(n1,N2)
return Ysim

View file

@ -21,7 +21,7 @@ class Poisson(Likelihood):
""" """
def __init__(self, gp_link=None): def __init__(self, gp_link=None):
if gp_link is None: if gp_link is None:
gp_link = link_functions.Log_ex_1() gp_link = link_functions.Log()
super(Poisson, self).__init__(gp_link, name='Poisson') super(Poisson, self).__init__(gp_link, name='Poisson')
@ -134,7 +134,19 @@ class Poisson(Likelihood):
d3lik_dlink3 = 2*y/(link_f)**3 d3lik_dlink3 = 2*y/(link_f)**3
return d3lik_dlink3 return d3lik_dlink3
def samples(self, gp): def conditional_mean(self,gp):
"""
The mean of the random variable conditioned on one value of the GP
"""
return self.gp_link.transf(gp)
def conditional_variance(self,gp):
"""
The variance of the random variable conditioned on one value of the GP
"""
return self.gp_link.transf(gp)
def samples(self, gp, Y_metadata=None):
""" """
Returns a set of samples of observations based on a given value of the latent variable. Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -9,6 +9,7 @@ from scipy import stats, integrate
from scipy.special import gammaln, gamma from scipy.special import gammaln, gamma
from likelihood import Likelihood from likelihood import Likelihood
from ..core.parameterization import Param from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
class StudentT(Likelihood): class StudentT(Likelihood):
""" """
@ -26,7 +27,7 @@ class StudentT(Likelihood):
super(StudentT, self).__init__(gp_link, name='Student_T') super(StudentT, self).__init__(gp_link, name='Student_T')
self.sigma2 = Param('t_noise', float(sigma2)) self.sigma2 = Param('t_noise', float(sigma2), Logexp())
self.v = Param('deg_free', float(deg_free)) self.v = Param('deg_free', float(deg_free))
self.add_parameter(self.sigma2) self.add_parameter(self.sigma2)
self.add_parameter(self.v) self.add_parameter(self.v)
@ -37,7 +38,7 @@ class StudentT(Likelihood):
def parameters_changed(self): def parameters_changed(self):
self.variance = (self.v / float(self.v - 2)) * self.sigma2 self.variance = (self.v / float(self.v - 2)) * self.sigma2
def update_gradients(self, derivatives): def update_gradients(self, grads):
""" """
Pull out the gradients, be careful as the order must match the order Pull out the gradients, be careful as the order must match the order
in which the parameters are added in which the parameters are added
@ -244,33 +245,33 @@ class StudentT(Likelihood):
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def predictive_variance(self, mu, sigma, predictive_mean=None): def predictive_mean(self, mu, sigma, Y_metadata=None):
""" """
Compute predictive variance of student_t*normal p(y*|f*)p(f*) Compute mean of the prediction
Need to find what the variance is at the latent points for a student t*normal p(y*|f*)p(f*)
(((g((v+1)/2))/(g(v/2)*s*sqrt(v*pi)))*(1+(1/v)*((y-f)/s)^2)^(-(v+1)/2))
*((1/(s*sqrt(2*pi)))*exp(-(1/(2*(s^2)))*((y-f)^2)))
""" """
return self.gp_link.transf(mu) # only true in link is monotoci, which it is.
#FIXME: Not correct def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
#We want the variance around test points y which comes from int p(y*|f*)p(f*) df* if self.deg_free <2.:
#Var(y*) = Var(E[y*|f*]) + E[Var(y*|f*)] return np.empty(mu.shape)*np.nan #not defined for small degress fo freedom
#Since we are given f* (mu) which is our mean (expected) value of y*|f* then the variance is the variance around this else:
#Which was also given to us as (var) return super(StudentT, self).predictive_variance(mu, variance, predictive_mean, Y_metadata)
#We also need to know the expected variance of y* around samples f*, this is the variance of the student t distribution
#However the variance of the student t distribution is not dependent on f, only on sigma and the degrees of freedom
true_var = 1/(1/sigma**2 + 1/self.variance)
return true_var def conditional_mean(self, gp):
return self.gp_link.transf(gp)
<<<<<<< HEAD
def predictive_mean(self, mu, sigma): def predictive_mean(self, mu, sigma):
""" """
Compute mean of the prediction Compute mean of the prediction
""" """
return mu return mu
=======
def conditional_variance(self, gp):
return self.deg_free/(self.deg_free - 2.)
>>>>>>> a3287c38ea775155df4e90f7fe1883d12ffb54b9
def samples(self, gp): def samples(self, gp, Y_metadata=None):
""" """
Returns a set of samples of observations based on a given value of the latent variable. Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -13,6 +13,7 @@ from warped_gp import WarpedGP
from bayesian_gplvm import BayesianGPLVM from bayesian_gplvm import BayesianGPLVM
from mrd import MRD from mrd import MRD
from gradient_checker import GradientChecker from gradient_checker import GradientChecker
from gp_multioutput_regression import GPMultioutputRegression
from sparse_gp_multioutput_regression import SparseGPMultioutputRegression
from ss_gplvm import SSGPLVM from ss_gplvm import SSGPLVM
from gp_coregionalized_regression import GPCoregionalizedRegression
from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
#.py file not included!!! #from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression

View file

@ -66,7 +66,7 @@ class BayesianGPLVM(SparseGP):
super(BayesianGPLVM, self).parameters_changed() super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict) self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) self.variational_prior.update_gradients_KL(self.X)

View file

@ -23,7 +23,7 @@ class GPClassification(GP):
def __init__(self, X, Y, kernel=None): def __init__(self, X, Y, kernel=None):
if kernel is None: if kernel is None:
kernel = kern.rbf(X.shape[1]) kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Bernoulli() likelihood = likelihoods.Bernoulli()

View file

@ -0,0 +1,44 @@
# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
from .. import util
class GPCoregionalizedRegression(GP):
"""
Gaussian Process model for heteroscedastic multioutput regression
This is a thin wrapper around the models.GP class, with a set of sensible defaults
:param X_list: list of input observations corresponding to each output
:type X_list: list of numpy arrays
:param Y_list: list of observed values related to the different noise models
:type Y_list: list of numpy arrays
:param kernel: a GPy kernel, defaults to RBF ** Coregionalized
:type kernel: None | GPy.kernel defaults
:likelihoods_list: a list of likelihoods, defaults to list of Gaussian likelihoods
:type likelihoods_list: None | a list GPy.likelihoods
:param name: model name
:type name: string
:param W_rank: number tuples of the corregionalization parameters 'W' (see coregionalize kernel documentation)
:type W_rank: integer
:param kernel_name: name of the kernel
:type kernel_name: string
"""
def __init__(self, X_list, Y_list, kernel=None, likelihoods_list=None, name='GPCR',W_rank=1,kernel_name='X'):
#Input and Output
X,Y,self.output_index = util.multioutput.build_XY(X_list,Y_list)
Ny = len(Y_list)
#Kernel
if kernel is None:
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.rbf(X.shape[1]-1), W_rank=1,name=kernel_name)
#Likelihood
likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list)
super(GPCoregionalizedRegression, self).__init__(X,Y,kernel,likelihood, Y_metadata={'output_index':self.output_index})

View file

@ -20,14 +20,14 @@ class GPRegression(GP):
""" """
def __init__(self, X, Y, kernel=None): def __init__(self, X, Y, kernel=None, Y_metadata=None):
if kernel is None: if kernel is None:
kernel = kern.RBF(X.shape[1]) kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Gaussian() likelihood = likelihoods.Gaussian()
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression') super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata)
def _getstate(self): def _getstate(self):
return GP._getstate(self) return GP._getstate(self)

View file

@ -41,7 +41,7 @@ class GPLVM(GP):
def parameters_changed(self): def parameters_changed(self):
super(GPLVM, self).parameters_changed() super(GPLVM, self).parameters_changed()
self.X.gradient = self.kern.gradients_X(self.dL_dK, self.X, None) self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
def _getstate(self): def _getstate(self):
return GP._getstate(self) return GP._getstate(self)

View file

@ -5,15 +5,15 @@ import numpy as np
import itertools import itertools
import pylab import pylab
from ..core import Model, SparseGP from ..core import Model
from ..util.linalg import PCA from ..util.linalg import PCA
from ..kern import Kern from ..kern import Kern
from bayesian_gplvm import BayesianGPLVM
from ..core.parameterization.variational import NormalPosterior, NormalPrior from ..core.parameterization.variational import NormalPosterior, NormalPrior
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData from ..core.parameterization import Param, Parameterized
from ..likelihoods.gaussian import Gaussian from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC
from ..likelihoods import Gaussian
class MRD2(Model): class MRD(Model):
""" """
Apply MRD to all given datasets Y in Ylist. Apply MRD to all given datasets Y in Ylist.
@ -43,61 +43,109 @@ class MRD2(Model):
:param :class:`~GPy.inference.latent_function_inference inference_method: the inference method to use :param :class:`~GPy.inference.latent_function_inference inference_method: the inference method to use
:param :class:`~GPy.likelihoods.likelihood.Likelihood` likelihood: the likelihood to use :param :class:`~GPy.likelihoods.likelihood.Likelihood` likelihood: the likelihood to use
:param str name: the name of this model :param str name: the name of this model
:param [str] Ynames: the names for the datasets given, must be of equal length as Ylist or None
""" """
def __init__(self, Ylist, input_dim, X=None, X_variance=None, def __init__(self, Ylist, input_dim, X=None, X_variance=None,
initx = 'PCA', initz = 'permute', initx = 'PCA', initz = 'permute',
num_inducing=10, Z=None, kernel=None, num_inducing=10, Z=None, kernel=None,
inference_method=None, likelihood=None, name='mrd'): inference_method=None, likelihood=None, name='mrd', Ynames=None):
super(MRD2, self).__init__(name) super(MRD, self).__init__(name)
# sort out the kernels # sort out the kernels
if kernel is None: if kernel is None:
from ..kern import RBF from ..kern import RBF
self.kern = [RBF(input_dim, ARD=1, name='Y_{}'.format(i)) for i in range(len(Ylist))] self.kern = [RBF(input_dim, ARD=1, name='rbf'.format(i)) for i in range(len(Ylist))]
elif isinstance(kernel, Kern): elif isinstance(kernel, Kern):
self.kern = [kernel.copy(name='Y_{}'.format(i)) for i in range(len(Ylist))] self.kern = [kernel.copy(name='{}'.format(kernel.name, i)) for i in range(len(Ylist))]
else: else:
assert len(kernel) == len(Ylist), "need one kernel per output" assert len(kernel) == len(Ylist), "need one kernel per output"
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!" assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
self.kern = kernel
self.input_dim = input_dim self.input_dim = input_dim
self.num_inducing = num_inducing self.num_inducing = num_inducing
self.Ylist = Ylist
self._in_init_ = True self._in_init_ = True
X = self._init_X(initx, Ylist) X = self._init_X(initx, Ylist)
self.Z = self._init_Z(initz, X) self.Z = Param('inducing inputs', self._init_Z(initz, X))
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
if X_variance is None: if X_variance is None:
X_variance = np.random.uniform(0,.2,X.shape) X_variance = np.random.uniform(0, .2, X.shape)
self.variational_prior = NormalPrior() self.variational_prior = NormalPrior()
self.X = NormalPosterior(X, X_variance) self.X = NormalPosterior(X, X_variance)
if likelihood is None: if likelihood is None:
likelihood = Gaussian() self.likelihood = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
else: self.likelihood = likelihood
if inference_method is None: if inference_method is None:
if any(np.any(np.isnan(y)) for y in Ylist): self.inference_method= []
self.inference_method = VarDTCMissingData(limit=len(Ylist)) for y in Ylist:
if np.any(np.isnan(y)):
self.inference_method.append(VarDTCMissingData(limit=1))
else:
self.inference_method.append(VarDTC(limit=1))
else:
self.inference_method = inference_method
self.inference_method.set_limit(len(Ylist))
self.Ylist = Ylist self.add_parameters(self.X, self.Z)
if Ynames is None:
Ynames = ['Y{}'.format(i) for i in range(len(Ylist))]
for i, n, k, l in itertools.izip(itertools.count(), Ynames, self.kern, self.likelihood):
p = Parameterized(name=n)
p.add_parameter(k)
p.add_parameter(l)
setattr(self, 'Y{}'.format(i), p)
self.add_parameter(p)
self._in_init_ = False
def parameters_changed(self): def parameters_changed(self):
for y in self.Ylist: self._log_marginal_likelihood = 0
pass self.posteriors = []
self.Z.gradient = 0.
self.X.mean.gradient = 0.
self.X.variance.gradient = 0.
def _init_X(self, init='PCA', likelihood_list=None): for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method):
if likelihood_list is None: posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
likelihood_list = self.likelihood_list
Ylist = [] self.posteriors.append(posterior)
for likelihood_or_Y in likelihood_list: self._log_marginal_likelihood += lml
if type(likelihood_or_Y) is np.ndarray:
Ylist.append(likelihood_or_Y) # likelihood gradients
else: l.update_gradients(grad_dict.pop('dL_dthetaL'))
Ylist.append(likelihood_or_Y.Y)
del likelihood_list #gradients wrt kernel
dL_dKmm = grad_dict.pop('dL_dKmm')
k.update_gradients_full(dL_dKmm, self.Z, None)
target = k.gradient.copy()
k.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict)
k.gradient += target
#gradients wrt Z
self.Z.gradient += k.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += k.gradients_Z_expectations(
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict)
self.X.mean.gradient += dL_dmean
self.X.variance.gradient += dL_dS
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X)
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
def log_likelihood(self):
return self._log_marginal_likelihood
def _init_X(self, init='PCA', Ylist=None):
if Ylist is None:
Ylist = self.Ylist
if init in "PCA_concat": if init in "PCA_concat":
X = PCA(np.hstack(Ylist), self.input_dim)[0] X = PCA(np.hstack(Ylist), self.input_dim)[0]
elif init in "PCA_single": elif init in "PCA_single":
@ -106,7 +154,6 @@ class MRD2(Model):
X[:, qs] = PCA(Y, len(qs))[0] X[:, qs] = PCA(Y, len(qs))[0]
else: # init == 'random': else: # init == 'random':
X = np.random.randn(Ylist[0].shape[0], self.input_dim) X = np.random.randn(Ylist[0].shape[0], self.input_dim)
self.X = X
return X return X
def _init_Z(self, init="permute", X=None): def _init_Z(self, init="permute", X=None):
@ -116,259 +163,8 @@ class MRD2(Model):
Z = np.random.permutation(X.copy())[:self.num_inducing] Z = np.random.permutation(X.copy())[:self.num_inducing]
elif init in "random": elif init in "random":
Z = np.random.randn(self.num_inducing, self.input_dim) * X.var() Z = np.random.randn(self.num_inducing, self.input_dim) * X.var()
self.Z = Z
return Z return Z
class MRD(Model):
"""
Do MRD on given Datasets in Ylist.
All Ys in likelihood_list are in [N x Dn], where Dn can be different per Yn,
N must be shared across datasets though.
:param likelihood_list: list of observed datasets (:py:class:`~GPy.likelihoods.gaussian.Gaussian` if not supplied directly)
:type likelihood_list: [:py:class:`~GPy.likelihoods.likelihood.likelihood` | :py:class:`ndarray`]
:param names: names for different gplvm models
:type names: [str]
:param input_dim: latent dimensionality
:type input_dim: int
:param initx: initialisation method for the latent space :
* 'concat' - PCA on concatenation of all datasets
* 'single' - Concatenation of PCA on datasets, respectively
* 'random' - Random draw from a normal
:type initx: ['concat'|'single'|'random']
:param initz: initialisation method for inducing inputs
:type initz: 'permute'|'random'
:param X: Initial latent space
:param X_variance: Initial latent space variance
:param Z: initial inducing inputs
:param num_inducing: number of inducing inputs to use
:param kernels: list of kernels or kernel shared for all BGPLVMS
:type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default)
"""
def __init__(self, likelihood_or_Y_list, input_dim, num_inducing=10, names=None,
kernels=None, initx='PCA',
initz='permute', _debug=False, **kw):
if names is None:
self.names = ["{}".format(i) for i in range(len(likelihood_or_Y_list))]
# sort out the kernels
if kernels is None:
kernels = [None] * len(likelihood_or_Y_list)
elif isinstance(kernels, Kern):
kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))]
else:
assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output"
assert all([isinstance(k, Kern) for k in kernels]), "invalid kernel object detected!"
assert not ('kernel' in kw), "pass kernels through `kernels` argument"
self.input_dim = input_dim
self._debug = _debug
self.num_inducing = num_inducing
self._in_init_ = True
X = self._init_X(initx, likelihood_or_Y_list)
Z = self._init_Z(initz, X)
self.num_inducing = Z.shape[0] # ensure M==N if M>N
self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, num_inducing=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
del self._in_init_
self.gref = self.bgplvms[0]
nparams = np.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms])
self.nparams = nparams.cumsum()
self.num_data = self.gref.num_data
self.NQ = self.num_data * self.input_dim
self.MQ = self.num_inducing * self.input_dim
Model.__init__(self)
self.ensure_default_constraints()
def _getstate(self):
return Model._getstate(self) + [self.names,
self.bgplvms,
self.gref,
self.nparams,
self.input_dim,
self.num_inducing,
self.num_data,
self.NQ,
self.MQ]
def _setstate(self, state):
self.MQ = state.pop()
self.NQ = state.pop()
self.num_data = state.pop()
self.num_inducing = state.pop()
self.input_dim = state.pop()
self.nparams = state.pop()
self.gref = state.pop()
self.bgplvms = state.pop()
self.names = state.pop()
Model._setstate(self, state)
@property
def X(self):
return self.gref.X
@X.setter
def X(self, X):
try:
self.propagate_param(X=X)
except AttributeError:
if not self._in_init_:
raise AttributeError("bgplvm list not initialized")
@property
def Z(self):
return self.gref.Z
@Z.setter
def Z(self, Z):
try:
self.propagate_param(Z=Z)
except AttributeError:
if not self._in_init_:
raise AttributeError("bgplvm list not initialized")
@property
def X_variance(self):
return self.gref.X_variance
@X_variance.setter
def X_variance(self, X_var):
try:
self.propagate_param(X_variance=X_var)
except AttributeError:
if not self._in_init_:
raise AttributeError("bgplvm list not initialized")
@property
def likelihood_list(self):
return [g.likelihood.Y for g in self.bgplvms]
@likelihood_list.setter
def likelihood_list(self, likelihood_list):
for g, Y in itertools.izip(self.bgplvms, likelihood_list):
g.likelihood.Y = Y
@property
def auto_scale_factor(self):
"""
set auto_scale_factor for all gplvms
:param b: auto_scale_factor
:type b:
"""
return self.gref.auto_scale_factor
@auto_scale_factor.setter
def auto_scale_factor(self, b):
self.propagate_param(auto_scale_factor=b)
def propagate_param(self, **kwargs):
for key, val in kwargs.iteritems():
for g in self.bgplvms:
g.__setattr__(key, val)
def randomize(self, initx='concat', initz='permute', *args, **kw):
super(MRD, self).randomize(*args, **kw)
self._init_X(initx, self.likelihood_list)
self._init_Z(initz, self.X)
#def _get_latent_param_names(self):
def _get_param_names(self):
n1 = self.gref._get_param_names()
n1var = n1[:self.NQ * 2 + self.MQ]
# return n1var
#
#def _get_kernel_names(self):
map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x),
itertools.izip(ns,
itertools.repeat(name)))
return list(itertools.chain(n1var, *(map_names(\
SparseGP._get_param_names(g)[self.MQ:], n) \
for g, n in zip(self.bgplvms, self.names))))
# kernel_names = (map_names(SparseGP._get_param_names(g)[self.MQ:], n) for g, n in zip(self.bgplvms, self.names))
# return kernel_names
#def _get_param_names(self):
# X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
# S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
# n1var = self._get_latent_param_names()
# kernel_names = self._get_kernel_names()
# return list(itertools.chain(n1var, *kernel_names))
#def _get_print_names(self):
# return list(itertools.chain(*self._get_kernel_names()))
def _get_params(self):
"""
return parameter list containing private and shared parameters as follows:
=================================================================
| mu | S | Z || theta1 | theta2 | .. | thetaN |
=================================================================
"""
X = self.gref.X.ravel()
X_var = self.gref.X_variance.ravel()
Z = self.gref.Z.ravel()
thetas = [SparseGP._get_params(g)[g.Z.size:] for g in self.bgplvms]
params = np.hstack([X, X_var, Z, np.hstack(thetas)])
return params
# def _set_var_params(self, g, X, X_var, Z):
# g.X = X.reshape(self.num_data, self.input_dim)
# g.X_variance = X_var.reshape(self.num_data, self.input_dim)
# g.Z = Z.reshape(self.num_inducing, self.input_dim)
#
# def _set_kern_params(self, g, p):
# g.kern._set_params(p[:g.kern.num_params])
# g.likelihood._set_params(p[g.kern.num_params:])
def _set_params(self, x):
start = 0; end = self.NQ
X = x[start:end]
start = end; end += start
X_var = x[start:end]
start = end; end += self.MQ
Z = x[start:end]
thetas = x[end:]
# set params for all:
for g, s, e in itertools.izip(self.bgplvms, self.nparams, self.nparams[1:]):
g._set_params(np.hstack([X, X_var, Z, thetas[s:e]]))
# self._set_var_params(g, X, X_var, Z)
# self._set_kern_params(g, thetas[s:e].copy())
# g._compute_kernel_matrices()
# if self.auto_scale_factor:
# g.scale_factor = np.sqrt(g.psi2.sum(0).mean() * g.likelihood.precision)
# # self.scale_factor = np.sqrt(self.psi2.sum(0).mean() * self.likelihood.precision)
# g._computations()
def update_likelihood_approximation(self): # TODO: object oriented vs script base
for bgplvm in self.bgplvms:
bgplvm.update_likelihood_approximation()
def log_likelihood(self):
ll = -self.gref.KL_divergence()
for g in self.bgplvms:
ll += SparseGP.log_likelihood(g)
return ll
def _log_likelihood_gradients(self):
dLdmu, dLdS = reduce(lambda a, b: [a[0] + b[0], a[1] + b[1]], (g.dL_dmuS() for g in self.bgplvms))
dKLmu, dKLdS = self.gref.dKL_dmuS()
dLdmu -= dKLmu
dLdS -= dKLdS
dLdmuS = np.hstack((dLdmu.flatten(), dLdS.flatten())).flatten()
dldzt1 = reduce(lambda a, b: a + b, (SparseGP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms))
return np.hstack((dLdmuS,
dldzt1,
np.hstack([np.hstack([g.dL_dtheta(),
g.likelihood._gradients(\
partial=g.partial_for_likelihood)]) \
for g in self.bgplvms])))
def _handle_plotting(self, fignum, axes, plotf, sharex=False, sharey=False): def _handle_plotting(self, fignum, axes, plotf, sharex=False, sharey=False):
if axes is None: if axes is None:
fig = pylab.figure(num=fignum) fig = pylab.figure(num=fignum)

View file

@ -0,0 +1,66 @@
# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import SparseGP
from ..inference.latent_function_inference import VarDTC
from .. import likelihoods
from .. import kern
from .. import util
class SparseGPCoregionalizedRegression(SparseGP):
"""
Sparse Gaussian Process model for heteroscedastic multioutput regression
This is a thin wrapper around the SparseGP class, with a set of sensible defaults
:param X_list: list of input observations corresponding to each output
:type X_list: list of numpy arrays
:param Y_list: list of observed values related to the different noise models
:type Y_list: list of numpy arrays
:param Z_list: list of inducing inputs (optional)
:type Z_list: empty list | list of numpy arrays
:param kernel: a GPy kernel, defaults to RBF ** Coregionalized
:type kernel: None | GPy.kernel defaults
:likelihoods_list: a list of likelihoods, defaults to list of Gaussian likelihoods
:type likelihoods_list: None | a list GPy.likelihoods
:param num_inducing: number of inducing inputs, defaults to 10 per output (ignored if Z_list is not empty)
:type num_inducing: integer | list of integers
:param name: model name
:type name: string
:param W_rank: number tuples of the corregionalization parameters 'W' (see coregionalize kernel documentation)
:type W_rank: integer
:param kernel_name: name of the kernel
:type kernel_name: string
"""
def __init__(self, X_list, Y_list, Z_list=[], kernel=None, likelihoods_list=None, num_inducing=10, X_variance=None, name='SGPCR',W_rank=1,kernel_name='X'):
#Input and Output
X,Y,self.output_index = util.multioutput.build_XY(X_list,Y_list)
Ny = len(Y_list)
#Kernel
if kernel is None:
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.rbf(X.shape[1]-1), W_rank=1,name=kernel_name)
#Likelihood
likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list)
#Inducing inputs list
if len(Z_list):
assert len(Z_list) == self.output_dim, 'Number of outputs do not match length of inducing inputs list.'
else:
if isinstance(num_inducing,np.int):
num_inducing = [num_inducing] * Ny
num_inducing = np.asarray(num_inducing)
assert num_inducing.size == Ny, 'Number of outputs do not match length of inducing inputs list.'
for ni,Xi in zip(num_inducing,X_list):
i = np.random.permutation(Xi.shape[0])[:ni]
Z_list.append(Xi[i].copy())
Z, _, Iz = util.multioutput.build_XY(Z_list)
super(SparseGPCoregionalizedRegression, self).__init__(X, Y, Z, kernel, likelihood, inference_method=VarDTC(), Y_metadata={'output_index':self.output_index})
self['.*inducing'][:,-1].fix()

View file

@ -61,7 +61,7 @@ class SSGPLVM(SparseGP):
super(SSGPLVM, self).parameters_changed() super(SSGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
self.X.mean.gradient, self.X.variance.gradient, self.X.binary_prob.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict) self.X.mean.gradient, self.X.variance.gradient, self.X.binary_prob.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) self.variational_prior.update_gradients_KL(self.X)

View file

@ -6,13 +6,15 @@ import numpy as np
import Tango import Tango
from base_plots import gpplot, x_frame1D, x_frame2D from base_plots import gpplot, x_frame1D, x_frame2D
from ...util.misc import param_to_array from ...util.misc import param_to_array
from ...models.gp_coregionalized_regression import GPCoregionalizedRegression
from ...models.sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
def plot_fit(model, plot_limits=None, which_data_rows='all', def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[], which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None, levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False, plot_raw=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None):
""" """
Plot the posterior of the GP. Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations. - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -68,7 +70,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#work out what the inputs are for plotting (1D or 2D) #work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs]) fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims) free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
plots = {}
#one dimensional plotting #one dimensional plotting
if len(free_dims) == 1: if len(free_dims) == 1:
@ -84,25 +86,30 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
m, v = model._raw_predict(Xgrid) m, v = model._raw_predict(Xgrid)
lower = m - 2*np.sqrt(v) lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v) upper = m + 2*np.sqrt(v)
Y = Y
else: else:
m, v, lower, upper = model.predict(Xgrid) if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
Y = Y meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
else:
meta = None
m, v = model.predict(Xgrid, full_cov=False, Y_metadata=meta)
lower, upper = model.predict_quantiles(Xgrid, Y_metadata=meta)
for d in which_data_ycols: for d in which_data_ycols:
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol) plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5) plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
#optionally plot some samples #optionally plot some samples
if samples: #NOTE not tested with fixed_inputs if samples: #NOTE not tested with fixed_inputs
Ysim = model.posterior_samples(Xgrid, samples) Ysim = model.posterior_samples(Xgrid, samples)
for yi in Ysim.T: for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs. #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
#add error bars for uncertain (if input uncertainty is being modelled) #add error bars for uncertain (if input uncertainty is being modelled)
if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs(): if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(), plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()), xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5) ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
@ -118,7 +125,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims] #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
Zu = Z[:,free_dims] Zu = Z[:,free_dims]
z_height = ax.get_ylim()[0] z_height = ax.get_ylim()[0]
ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12) plots['inducing_inputs'] = ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
@ -137,14 +144,12 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#predict on the frame and plot #predict on the frame and plot
if plot_raw: if plot_raw:
m, _ = model._raw_predict(Xgrid) m, _ = model._raw_predict(Xgrid)
Y = Y
else: else:
m, _, _, _ = model.predict(Xgrid) m, _ = model.predict(Xgrid)
Y = Y
for d in which_data_ycols: for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T m_d = m[:,d].reshape(resolution, resolution).T
ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
#set the limits of the plot to some sensible values #set the limits of the plot to some sensible values
ax.set_xlim(xmin[0], xmax[0]) ax.set_xlim(xmin[0], xmax[0])
@ -157,11 +162,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if hasattr(model,"Z"): if hasattr(model,"Z"):
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims] #Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
Zu = Z[:,free_dims] Zu = Z[:,free_dims]
ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo') plots['inducing_inputs'] = ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo')
else: else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions" raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
return plots
def plot_fit_f(model, *args, **kwargs): def plot_fit_f(model, *args, **kwargs):
""" """

View file

@ -1,85 +0,0 @@
# Copyright (c) 2012, Nicolo Fusi
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
from ..models import BayesianGPLVM
class BGPLVMTests(unittest.TestCase):
def test_bias_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_linear_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.Linear(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_bias_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.RBF(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.RBF(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_line_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.RBF(input_dim) + GPy.kern.Linear(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.RBF(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_linear_bias_kern(self):
N, num_inducing, input_dim, D = 30, 5, 4, 30
X = np.random.rand(N, input_dim)
k = GPy.kern.Linear(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.Linear(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -17,21 +17,30 @@ class Test(unittest.TestCase):
self.param_index.add(one, [3]) self.param_index.add(one, [3])
self.param_index.add(two, [0,5]) self.param_index.add(two, [0,5])
self.param_index.add(three, [2,4,7]) self.param_index.add(three, [2,4,7])
self.view = ParameterIndexOperationsView(self.param_index, 2, 6)
def test_clear(self):
self.param_index.clear()
self.assertDictEqual(self.param_index._properties, {})
def test_remove(self): def test_remove(self):
self.param_index.remove(three, np.r_[3:10]) self.param_index.remove(three, np.r_[3:10])
self.assertListEqual(self.param_index[three].tolist(), [2]) self.assertListEqual(self.param_index[three].tolist(), [2])
self.param_index.remove(one, [1]) self.param_index.remove(one, [1])
self.assertListEqual(self.param_index[one].tolist(), [3]) self.assertListEqual(self.param_index[one].tolist(), [3])
self.assertListEqual(self.param_index.remove('not in there', []).tolist(), [])
self.param_index.remove(one, [9])
self.assertListEqual(self.param_index[one].tolist(), [3])
self.assertListEqual(self.param_index.remove('not in there', [2,3,4]).tolist(), [])
def test_shift_left(self): def test_shift_left(self):
self.param_index.shift_left(1, 2) self.view.shift_left(0, 2)
self.assertListEqual(self.param_index[three].tolist(), [2,5]) self.assertListEqual(self.param_index[three].tolist(), [2,5])
self.assertListEqual(self.param_index[two].tolist(), [0,3]) self.assertListEqual(self.param_index[two].tolist(), [0,3])
self.assertListEqual(self.param_index[one].tolist(), [1]) self.assertListEqual(self.param_index[one].tolist(), [])
def test_shift_right(self): def test_shift_right(self):
self.param_index.shift_right(5, 2) self.view.shift_right(3, 2)
self.assertListEqual(self.param_index[three].tolist(), [2,4,9]) self.assertListEqual(self.param_index[three].tolist(), [2,4,9])
self.assertListEqual(self.param_index[two].tolist(), [0,7]) self.assertListEqual(self.param_index[two].tolist(), [0,7])
self.assertListEqual(self.param_index[one].tolist(), [3]) self.assertListEqual(self.param_index[one].tolist(), [3])
@ -44,17 +53,17 @@ class Test(unittest.TestCase):
# three three three # three three three
# view: [0 1 2 3 4 5 ] # view: [0 1 2 3 4 5 ]
#======================================================================= #=======================================================================
view = ParameterIndexOperationsView(self.param_index, 2, 6) self.view = ParameterIndexOperationsView(self.param_index, 2, 6)
self.assertSetEqual(set(view.properties()), set([one, two, three])) self.assertSetEqual(set(self.view.properties()), set([one, two, three]))
for v,p in zip(view.properties_for(np.r_[:6]), self.param_index.properties_for(np.r_[2:2+6])): for v,p in zip(self.view.properties_for(np.r_[:6]), self.param_index.properties_for(np.r_[2:2+6])):
self.assertEqual(v, p) self.assertEqual(v, p)
self.assertSetEqual(set(view[two]), set([3])) self.assertSetEqual(set(self.view[two]), set([3]))
self.assertSetEqual(set(self.param_index[two]), set([0, 5])) self.assertSetEqual(set(self.param_index[two]), set([0, 5]))
view.add(two, np.array([0])) self.view.add(two, np.array([0]))
self.assertSetEqual(set(view[two]), set([0,3])) self.assertSetEqual(set(self.view[two]), set([0,3]))
self.assertSetEqual(set(self.param_index[two]), set([0, 2, 5])) self.assertSetEqual(set(self.param_index[two]), set([0, 2, 5]))
view.clear() self.view.clear()
for v,p in zip(view.properties_for(np.r_[:6]), self.param_index.properties_for(np.r_[2:2+6])): for v,p in zip(self.view.properties_for(np.r_[:6]), self.param_index.properties_for(np.r_[2:2+6])):
self.assertEqual(v, p) self.assertEqual(v, p)
self.assertEqual(v, []) self.assertEqual(v, [])
param_index = ParameterIndexOperations() param_index = ParameterIndexOperations()
@ -62,11 +71,17 @@ class Test(unittest.TestCase):
param_index.add(two, [0,5]) param_index.add(two, [0,5])
param_index.add(three, [2,4,7]) param_index.add(three, [2,4,7])
view2 = ParameterIndexOperationsView(param_index, 2, 6) view2 = ParameterIndexOperationsView(param_index, 2, 6)
view.update(view2) self.view.update(view2)
for [i,v],[i2,v2] in zip(sorted(param_index.items()), sorted(self.param_index.items())): for [i,v],[i2,v2] in zip(sorted(param_index.items()), sorted(self.param_index.items())):
self.assertEqual(i, i2) self.assertEqual(i, i2)
self.assertTrue(np.all(v == v2)) self.assertTrue(np.all(v == v2))
def test_misc(self):
for k,v in self.param_index.copy()._properties.iteritems():
self.assertListEqual(self.param_index[k].tolist(), v.tolist())
self.assertEqual(self.param_index.size, 6)
self.assertEqual(self.view.size, 5)
if __name__ == "__main__": if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_index_view'] #import sys;sys.argv = ['', 'Test.test_index_view']
unittest.main() unittest.main()

View file

@ -6,7 +6,9 @@ import numpy as np
import GPy import GPy
import sys import sys
verbose = True verbose = 0
class Kern_check_model(GPy.core.Model): class Kern_check_model(GPy.core.Model):
""" """
@ -31,9 +33,10 @@ class Kern_check_model(GPy.core.Model):
self.X2 = X2 self.X2 = X2
self.dL_dK = dL_dK self.dL_dK = dL_dK
def is_positive_definite(self): def is_positive_semi_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0] v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<-10*sys.float_info.epsilon): if any(v.real<=-1e-10):
print v.real.min()
return False return False
else: else:
return True return True
@ -87,11 +90,11 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum() return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
def parameters_changed(self): def parameters_changed(self):
self.X.gradient = self.kernel.gradients_X_diag(self.dL_dK, self.X) self.X.gradient = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X)
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False): def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None):
""" """
This function runs on kernels to check the correctness of their This function runs on kernels to check the correctness of their
implementation. It checks that the covariance function is positive definite implementation. It checks that the covariance function is positive definite
@ -106,18 +109,18 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
""" """
pass_checks = True pass_checks = True
if X==None: if X is None:
X = np.random.randn(10, kern.input_dim) X = np.random.randn(10, kern.input_dim)
if output_ind is not None: if output_ind is not None:
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0]) X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
if X2==None: if X2 is None:
X2 = np.random.randn(20, kern.input_dim) X2 = np.random.randn(20, kern.input_dim)
if output_ind is not None: if output_ind is not None:
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0]) X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
if verbose: if verbose:
print("Checking covariance function is positive definite.") print("Checking covariance function is positive definite.")
result = Kern_check_model(kern, X=X).is_positive_definite() result = Kern_check_model(kern, X=X).is_positive_semi_definite()
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:
@ -161,7 +164,10 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
if verbose: if verbose:
print("Checking gradients of K(X, X) wrt X.") print("Checking gradients of K(X, X) wrt X.")
try: try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose) testmodel = Kern_check_dK_dX(kern, X=X, X2=None)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError: except NotImplementedError:
result=True result=True
if verbose: if verbose:
@ -170,14 +176,17 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
print("Check passed.") print("Check passed.")
if not result: if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True) testmodel.checkgrad(verbose=True)
pass_checks = False pass_checks = False
return False return False
if verbose: if verbose:
print("Checking gradients of K(X, X2) wrt X.") print("Checking gradients of K(X, X2) wrt X.")
try: try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose) testmodel = Kern_check_dK_dX(kern, X=X, X2=X2)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError: except NotImplementedError:
result=True result=True
if verbose: if verbose:
@ -185,8 +194,8 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
if result and verbose: if result and verbose:
print("Check passed.") print("Check passed.")
if not result: if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:") print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True) testmodel.checkgrad(verbose=True)
pass_checks = False pass_checks = False
return False return False
@ -210,27 +219,137 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
class KernelTestsContinuous(unittest.TestCase): class KernelGradientTestsContinuous(unittest.TestCase):
def setUp(self): def setUp(self):
self.X = np.random.randn(100,2) self.N, self.D = 100, 5
self.X2 = np.random.randn(110,2) self.X = np.random.randn(self.N,self.D)
self.X2 = np.random.randn(self.N+10,self.D)
continuous_kerns = ['RBF', 'Linear'] continuous_kerns = ['RBF', 'Linear']
self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns] self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns]
def test_Matern32(self): def test_Matern32(self):
k = GPy.kern.Matern32(2) k = GPy.kern.Matern32(self.D)
self.assertTrue(kern_test(k, X=self.X, X2=self.X2, verbose=verbose)) k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Prod(self):
k = GPy.kern.Matern32(2, active_dims=[2,3]) * GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Add(self):
k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Matern52(self): def test_Matern52(self):
k = GPy.kern.Matern52(2) k = GPy.kern.Matern52(self.D)
self.assertTrue(kern_test(k, X=self.X, X2=self.X2, verbose=verbose)) k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
#TODO: turn off grad checkingwrt X for indexed kernels liek coregionalize def test_RBF(self):
k = GPy.kern.RBF(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Linear(self):
k = GPy.kern.Linear(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
#TODO: turn off grad checkingwrt X for indexed kernels like coregionalize
# class KernelGradientTestsContinuous1D(unittest.TestCase):
# def setUp(self):
# self.N, self.D = 100, 1
# self.X = np.random.randn(self.N,self.D)
# self.X2 = np.random.randn(self.N+10,self.D)
#
# continuous_kerns = ['RBF', 'Linear']
# self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns]
#
# def test_PeriodicExponential(self):
# k = GPy.kern.PeriodicExponential(self.D)
# k.randomize()
# self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
#
# def test_PeriodicMatern32(self):
# k = GPy.kern.PeriodicMatern32(self.D)
# k.randomize()
# self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
#
# def test_PeriodicMatern52(self):
# k = GPy.kern.PeriodicMatern52(self.D)
# k.randomize()
# self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
class KernelTestsMiscellaneous(unittest.TestCase):
def setUp(self):
N, D = 100, 10
self.X = np.linspace(-np.pi, +np.pi, N)[:,None] * np.ones(D)
self.rbf = GPy.kern.RBF(2, active_dims=slice(0,4,2))
self.linear = GPy.kern.Linear(2, active_dims=(3,9))
self.matern = GPy.kern.Matern32(3, active_dims=np.array([2,4,9]))
self.sumkern = self.rbf + self.linear
self.sumkern += self.matern
self.sumkern.randomize()
def test_active_dims(self):
self.assertEqual(self.sumkern.input_dim, 10)
self.assertEqual(self.sumkern.active_dims, slice(0, 10, 1))
def test_which_parts(self):
self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.matern]), self.linear.K(self.X)+self.matern.K(self.X)))
self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.rbf]), self.linear.K(self.X)+self.rbf.K(self.X)))
self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=self.sumkern.parts[0]), self.rbf.K(self.X)))
class KernelTestsNonContinuous(unittest.TestCase):
def setUp(self):
N0 = 3
N1 = 9
N2 = 4
N = N0+N1+N2
self.D = 3
self.X = np.random.randn(N, self.D+1)
indices = np.random.random_integers(0, 2, size=N)
self.X[indices==0, -1] = 0
self.X[indices==1, -1] = 1
self.X[indices==2, -1] = 2
#self.X = self.X[self.X[:, -1].argsort(), :]
self.X2 = np.random.randn((N0+N1)*2, self.D+1)
self.X2[:(N0*2), -1] = 0
self.X2[(N0*2):, -1] = 1
def test_IndependentOutputs(self):
k = GPy.kern.RBF(self.D)
kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests, please be (very) patient..." print "Running unit tests, please be (very) patient..."
unittest.main() #unittest.main()
np.random.seed(0)
N0 = 3
N1 = 9
N2 = 4
N = N0+N1+N2
D = 3
X = np.random.randn(N, D+1)
indices = np.random.random_integers(0, 2, size=N)
X[indices==0, -1] = 0
X[indices==1, -1] = 1
X[indices==2, -1] = 2
#X = X[X[:, -1].argsort(), :]
X2 = np.random.randn((N0+N1)*2, D+1)
X2[:(N0*2), -1] = 0
X2[(N0*2):, -1] = 1
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
k = GPy.kern.RBF(D)
kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))

View file

@ -1,11 +1,11 @@
import numpy as np import numpy as np
import unittest import unittest
import GPy import GPy
from ..models import GradientChecker from GPy.models import GradientChecker
import functools import functools
import inspect import inspect
from ..likelihoods import link_functions from GPy.likelihoods import link_functions
from ..core.parameterization import Param from GPy.core.parameterization import Param
from functools import partial from functools import partial
#np.random.seed(300) #np.random.seed(300)
#np.random.seed(7) #np.random.seed(7)
@ -541,7 +541,8 @@ class TestNoiseModels(object):
#import ipdb; ipdb.set_trace() #import ipdb; ipdb.set_trace()
#NOTE this test appears to be stochastic for some likelihoods (student t?) #NOTE this test appears to be stochastic for some likelihoods (student t?)
# appears to all be working in test mode right now... # appears to all be working in test mode right now...
#if isinstance(model, GPy.likelihoods.StudentT):
# import ipdb;ipdb.set_trace()
assert m.checkgrad(step=step) assert m.checkgrad(step=step)
########### ###########
@ -664,12 +665,11 @@ class LaplaceTests(unittest.TestCase):
print m1 print m1
print m2 print m2
m2.parameters_changed() m2[:] = m1[:]
#m2._set_params(m1._get_params())
#Predict for training points to get posterior mean and variance #Predict for training points to get posterior mean and variance
post_mean, post_var, _, _ = m1.predict(X) post_mean, post_var = m1.predict(X)
post_mean_approx, post_var_approx, _, _ = m2.predict(X) post_mean_approx, post_var_approx, = m2.predict(X)
if debug: if debug:
import pylab as pb import pylab as pb
@ -701,8 +701,8 @@ class LaplaceTests(unittest.TestCase):
np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2) np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2)
#Check marginals are the same with random #Check marginals are the same with random
m1.randomize() m1.randomize()
#m2._set_params(m1._get_params()) m2[:] = m1[:]
m2.parameters_changed()
np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2) np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2)
#Check they are checkgradding #Check they are checkgradding

View file

@ -1,32 +0,0 @@
# Copyright (c) 2013, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
'''
Created on 10 Apr 2013
@author: maxz
'''
import unittest
import numpy as np
import GPy
class MRDTests(unittest.TestCase):
def test_gradients(self):
num_m = 3
N, num_inducing, input_dim, D = 20, 8, 6, 20
X = np.random.rand(N, input_dim)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
K = k.K(X)
Ylist = [np.random.multivariate_normal(np.zeros(N), K, input_dim).T for _ in range(num_m)]
likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist]
m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, num_inducing=num_inducing)
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -21,8 +21,6 @@ class ParameterizedTest(Parameterized):
params_changed_count = _trigger_start params_changed_count = _trigger_start
def parameters_changed(self): def parameters_changed(self):
self.params_changed_count += 1 self.params_changed_count += 1
def _set_params(self, params, trigger_parent=True):
Parameterized._set_params(self, params, trigger_parent=trigger_parent)
class Test(unittest.TestCase): class Test(unittest.TestCase):

View file

@ -7,8 +7,24 @@ import unittest
import GPy import GPy
import numpy as np import numpy as np
from GPy.core.parameterization.parameter_core import HierarchyError from GPy.core.parameterization.parameter_core import HierarchyError
from GPy.core.parameterization.array_core import ObsAr
class Test(unittest.TestCase): class ArrayCoreTest(unittest.TestCase):
def setUp(self):
self.X = np.random.normal(1,1, size=(100,10))
self.obsX = ObsAr(self.X)
def test_init(self):
X = ObsAr(self.X)
X2 = ObsAr(X)
self.assertIs(X, X2, "no new Observable array, when Observable is given")
def test_slice(self):
t1 = self.X[2:78]
t2 = self.obsX[2:78]
self.assertListEqual(t1.tolist(), t2.tolist(), "Slicing should be the exact same, as in ndarray")
class ParameterizedTest(unittest.TestCase):
def setUp(self): def setUp(self):
self.rbf = GPy.kern.RBF(1) self.rbf = GPy.kern.RBF(1)
@ -18,9 +34,9 @@ class Test(unittest.TestCase):
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1)) self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
self.test1 = GPy.core.Parameterized("test model") self.test1 = GPy.core.Parameterized("test model")
self.test1.add_parameter(self.white) self.test1.kern = self.rbf+self.white
self.test1.add_parameter(self.rbf, 0) self.test1.add_parameter(self.test1.kern)
self.test1.add_parameter(self.param) self.test1.add_parameter(self.param, 0)
x = np.linspace(-2,6,4)[:,None] x = np.linspace(-2,6,4)[:,None]
y = np.sin(x) y = np.sin(x)
@ -29,23 +45,24 @@ class Test(unittest.TestCase):
def test_add_parameter(self): def test_add_parameter(self):
self.assertEquals(self.rbf._parent_index_, 0) self.assertEquals(self.rbf._parent_index_, 0)
self.assertEquals(self.white._parent_index_, 1) self.assertEquals(self.white._parent_index_, 1)
self.assertEquals(self.param._parent_index_, 0)
pass pass
def test_fixes(self): def test_fixes(self):
self.white.fix(warning=False) self.white.fix(warning=False)
self.test1.remove_parameter(self.test1.param) self.test1.remove_parameter(self.param)
self.assertTrue(self.test1._has_fixes()) self.assertTrue(self.test1._has_fixes())
from GPy.core.parameterization.transformations import FIXED, UNFIXED from GPy.core.parameterization.transformations import FIXED, UNFIXED
self.assertListEqual(self.test1._fixes_.tolist(),[UNFIXED,UNFIXED,FIXED]) self.assertListEqual(self.test1._fixes_.tolist(),[UNFIXED,UNFIXED,FIXED])
self.test1.kern.add_parameter(self.white, 0)
self.test1.add_parameter(self.white, 0)
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED]) self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
self.test1.kern.rbf.fix()
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3)
def test_remove_parameter(self): def test_remove_parameter(self):
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
self.white.fix() self.white.fix()
self.test1.remove_parameter(self.white) self.test1.kern.remove_parameter(self.white)
self.assertIs(self.test1._fixes_,None) self.assertIs(self.test1._fixes_,None)
self.assertListEqual(self.white._fixes_.tolist(), [FIXED]) self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
@ -66,7 +83,12 @@ class Test(unittest.TestCase):
self.assertListEqual(self.white._fixes_.tolist(), [FIXED]) self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1]) self.assertListEqual(self.test1.constraints[Logexp()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
def test_remove_parameter_param_array_grad_array(self):
val = self.test1.kern._param_array_.copy()
self.test1.kern.remove_parameter(self.white)
self.assertListEqual(self.test1.kern._param_array_.tolist(), val[:2].tolist())
def test_add_parameter_already_in_hirarchy(self): def test_add_parameter_already_in_hirarchy(self):
self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0]) self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])
@ -76,34 +98,46 @@ class Test(unittest.TestCase):
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops) self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2)) self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2))
from GPy.core.parameterization.transformations import Logexp from GPy.core.parameterization.transformations import Logexp
kern = self.rbf+self.white kern = self.test1.kern
self.test1.remove_parameter(kern)
self.assertListEqual(kern.constraints[Logexp()].tolist(), range(3)) self.assertListEqual(kern.constraints[Logexp()].tolist(), range(3))
def test_constraints(self): def test_constraints(self):
self.rbf.constrain(GPy.transformations.Square(), False) self.rbf.constrain(GPy.transformations.Square(), False)
self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(2)) self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [2]) self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [self.param.size+self.rbf.size])
self.test1.remove_parameter(self.rbf) self.test1.kern.remove_parameter(self.rbf)
self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), []) self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), [])
def test_constraints_views(self): def test_constraints_views(self):
self.assertEqual(self.white.constraints._offset, 2) self.assertEqual(self.white.constraints._offset, self.param.size+self.rbf.size)
self.assertEqual(self.rbf.constraints._offset, 0) self.assertEqual(self.rbf.constraints._offset, self.param.size)
self.assertEqual(self.param.constraints._offset, 3) self.assertEqual(self.param.constraints._offset, 0)
def test_fixing_randomize(self): def test_fixing_randomize(self):
self.white.fix(warning=False) self.white.fix(warning=True)
val = float(self.test1.white.variance) val = float(self.white.variance)
self.test1.randomize() self.test1.randomize()
self.assertEqual(val, self.white.variance) self.assertEqual(val, self.white.variance)
def test_fixing_randomize_parameter_handling(self):
self.rbf.fix(warning=True)
val = float(self.rbf.variance)
self.test1.kern.randomize()
self.assertEqual(val, self.rbf.variance)
def test_fixing_optimize(self): def test_fixing_optimize(self):
self.testmodel.kern.lengthscale.fix() self.testmodel.kern.lengthscale.fix()
val = float(self.testmodel.kern.lengthscale) val = float(self.testmodel.kern.lengthscale)
self.testmodel.randomize() self.testmodel.randomize()
self.assertEqual(val, self.testmodel.kern.lengthscale) self.assertEqual(val, self.testmodel.kern.lengthscale)
def test_printing(self):
print self.test1
print self.param
print self.test1['']
if __name__ == "__main__": if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_add_parameter'] #import sys;sys.argv = ['', 'Test.test_add_parameter']
unittest.main() unittest.main()

View file

@ -15,7 +15,7 @@ class PriorTests(unittest.TestCase):
X, y = X[:, None], y[:, None] X, y = X[:, None], y[:, None]
m = GPy.models.GPRegression(X, y) m = GPy.models.GPRegression(X, y)
lognormal = GPy.priors.LogGaussian(1, 2) lognormal = GPy.priors.LogGaussian(1, 2)
m.set_prior('rbf', lognormal) m.rbf.set_prior(lognormal)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -28,7 +28,7 @@ class PriorTests(unittest.TestCase):
X, y = X[:, None], y[:, None] X, y = X[:, None], y[:, None]
m = GPy.models.GPRegression(X, y) m = GPy.models.GPRegression(X, y)
Gamma = GPy.priors.Gamma(1, 1) Gamma = GPy.priors.Gamma(1, 1)
m.set_prior('rbf', Gamma) m.rbf.set_prior(Gamma)
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -41,16 +41,9 @@ class PriorTests(unittest.TestCase):
X, y = X[:, None], y[:, None] X, y = X[:, None], y[:, None]
m = GPy.models.GPRegression(X, y) m = GPy.models.GPRegression(X, y)
gaussian = GPy.priors.Gaussian(1, 1) gaussian = GPy.priors.Gaussian(1, 1)
success = False
# setting a Gaussian prior on non-negative parameters # setting a Gaussian prior on non-negative parameters
# should raise an assertionerror. # should raise an assertionerror.
try: self.assertRaises(AssertionError, m.rbf.set_prior, gaussian)
m.set_prior('rbf', gaussian)
except AssertionError:
success = True
self.assertTrue(success)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -12,6 +12,7 @@ import numpy
from GPy.kern import RBF from GPy.kern import RBF
from GPy.kern import Linear from GPy.kern import Linear
from copy import deepcopy from copy import deepcopy
from GPy.core.parameterization.variational import NormalPosterior
__test__ = lambda: 'deep' in sys.argv __test__ = lambda: 'deep' in sys.argv
# np.random.seed(0) # np.random.seed(0)
@ -28,53 +29,21 @@ def ard(p):
class Test(unittest.TestCase): class Test(unittest.TestCase):
input_dim = 9 input_dim = 9
num_inducing = 13 num_inducing = 13
N = 300 N = 1000
Nsamples = 1e6 Nsamples = 1e6
def setUp(self): def setUp(self):
i_s_dim_list = [2,4,3]
indices = numpy.cumsum(i_s_dim_list).tolist()
input_slices = [slice(a,b) for a,b in zip([None]+indices, indices)]
#input_slices[2] = deepcopy(input_slices[1])
input_slice_kern = GPy.kern.kern(9,
[
RBF(i_s_dim_list[0], np.random.rand(), np.random.rand(i_s_dim_list[0]), ARD=True),
RBF(i_s_dim_list[1], np.random.rand(), np.random.rand(i_s_dim_list[1]), ARD=True),
Linear(i_s_dim_list[2], np.random.rand(i_s_dim_list[2]), ARD=True)
],
input_slices = input_slices
)
self.kerns = ( self.kerns = (
# input_slice_kern, #GPy.kern.RBF([0,1,2], ARD=True)+GPy.kern.Bias(self.input_dim)+GPy.kern.White(self.input_dim),
# (GPy.kern.rbf(self.input_dim, ARD=True) + #GPy.kern.RBF(self.input_dim)+GPy.kern.Bias(self.input_dim)+GPy.kern.White(self.input_dim),
# GPy.kern.linear(self.input_dim, ARD=True) + #GPy.kern.Linear(self.input_dim) + GPy.kern.Bias(self.input_dim) + GPy.kern.White(self.input_dim),
# GPy.kern.bias(self.input_dim) + #GPy.kern.Linear(self.input_dim, ARD=True) + GPy.kern.Bias(self.input_dim) + GPy.kern.White(self.input_dim),
# GPy.kern.white(self.input_dim)), GPy.kern.Linear([1,3,6,7], ARD=True) + GPy.kern.RBF([0,5,8], ARD=True) + GPy.kern.White(self.input_dim),
(#GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
GPy.kern.Linear(self.input_dim, np.random.rand(self.input_dim), ARD=True)
+GPy.kern.RBF(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
# +GPy.kern.bias(self.input_dim)
# +GPy.kern.white(self.input_dim)),
),
# (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
# GPy.kern.bias(self.input_dim, np.random.rand())),
# (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
# +GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
# #+GPy.kern.bias(self.input_dim, np.random.rand())
# #+GPy.kern.white(self.input_dim, np.random.rand())),
# ),
# GPy.kern.white(self.input_dim, np.random.rand())),
# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True),
# GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True),
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim),
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim),
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
# GPy.kern.bias(self.input_dim), GPy.kern.white(self.input_dim),
) )
self.q_x_mean = np.random.randn(self.input_dim) self.q_x_mean = np.random.randn(self.input_dim)[None]
self.q_x_variance = np.exp(np.random.randn(self.input_dim)) self.q_x_variance = np.exp(.5*np.random.randn(self.input_dim))[None]
self.q_x_samples = np.random.randn(self.Nsamples, self.input_dim) * np.sqrt(self.q_x_variance) + self.q_x_mean self.q_x_samples = np.random.randn(self.Nsamples, self.input_dim) * np.sqrt(self.q_x_variance) + self.q_x_mean
self.q_x = NormalPosterior(self.q_x_mean, self.q_x_variance)
self.Z = np.random.randn(self.num_inducing, self.input_dim) self.Z = np.random.randn(self.num_inducing, self.input_dim)
self.q_x_mean.shape = (1, self.input_dim) self.q_x_mean.shape = (1, self.input_dim)
self.q_x_variance.shape = (1, self.input_dim) self.q_x_variance.shape = (1, self.input_dim)
@ -114,8 +83,9 @@ class Test(unittest.TestCase):
def test_psi2(self): def test_psi2(self):
for kern in self.kerns: for kern in self.kerns:
kern.randomize()
Nsamples = int(np.floor(self.Nsamples/self.N)) Nsamples = int(np.floor(self.Nsamples/self.N))
psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance) psi2 = kern.psi2(self.Z, self.q_x)
K_ = np.zeros((self.num_inducing, self.num_inducing)) K_ = np.zeros((self.num_inducing, self.num_inducing))
diffs = [] diffs = []
for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)): for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
@ -130,8 +100,8 @@ class Test(unittest.TestCase):
pylab.figure(msg) pylab.figure(msg)
pylab.plot(diffs, marker='x', mew=.2) pylab.plot(diffs, marker='x', mew=.2)
# print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1) # print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1)
self.assertTrue(np.allclose(psi2.squeeze(), K_), self.assertTrue(np.allclose(psi2.squeeze(), K_,
#rtol=1e-1, atol=.1), atol=.1, rtol=1),
msg=msg + ": not matching") msg=msg + ": not matching")
# sys.stdout.write(".") # sys.stdout.write(".")
except: except:

View file

@ -11,6 +11,7 @@ import itertools
from GPy.core import Model from GPy.core import Model
from GPy.core.parameterization.param import Param from GPy.core.parameterization.param import Param
from GPy.core.parameterization.transformations import Logexp from GPy.core.parameterization.transformations import Logexp
from GPy.core.parameterization.variational import NormalPosterior
class PsiStatModel(Model): class PsiStatModel(Model):
def __init__(self, which, X, X_variance, Z, num_inducing, kernel): def __init__(self, which, X, X_variance, Z, num_inducing, kernel):
@ -18,23 +19,24 @@ class PsiStatModel(Model):
self.which = which self.which = which
self.X = Param("X", X) self.X = Param("X", X)
self.X_variance = Param('X_variance', X_variance, Logexp()) self.X_variance = Param('X_variance', X_variance, Logexp())
self.q = NormalPosterior(self.X, self.X_variance)
self.Z = Param("Z", Z) self.Z = Param("Z", Z)
self.N, self.input_dim = X.shape self.N, self.input_dim = X.shape
self.num_inducing, input_dim = Z.shape self.num_inducing, input_dim = Z.shape
assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape) assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
self.kern = kernel self.kern = kernel
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance) self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.q)
self.add_parameters(self.X, self.X_variance, self.Z, self.kern) self.add_parameters(self.q, self.Z, self.kern)
def log_likelihood(self): def log_likelihood(self):
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum() return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
def parameters_changed(self): def parameters_changed(self):
psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance) psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.q)
self.X.gradient = psimu self.X.gradient = psimu
self.X_variance.gradient = psiS self.X_variance.gradient = psiS
#psimu, psiS = numpy.ones(self.N * self.input_dim), numpy.ones(self.N * self.input_dim) #psimu, psiS = numpy.ones(self.N * self.input_dim), numpy.ones(self.N * self.input_dim)
try: psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance) try: psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.q)
except AttributeError: psiZ = numpy.zeros_like(self.Z) except AttributeError: psiZ = numpy.zeros_like(self.Z)
self.Z.gradient = psiZ self.Z.gradient = psiZ
#psiZ = numpy.ones(self.num_inducing * self.input_dim) #psiZ = numpy.ones(self.num_inducing * self.input_dim)
@ -176,6 +178,6 @@ if __name__ == "__main__":
+GPy.kern.White(input_dim) +GPy.kern.White(input_dim)
) )
) )
m2.ensure_default_constraints() #m2.ensure_default_constraints()
else: else:
unittest.main() unittest.main()

View file

@ -34,7 +34,7 @@ class GradientTests(unittest.TestCase):
model_fit = getattr(GPy.models, model_type) model_fit = getattr(GPy.models, model_type)
# noise = GPy.kern.White(dimension) # noise = GPy.kern.White(dimension)
kern = kern # + noise kern = kern # + noise
if uncertain_inputs: if uncertain_inputs:
m = model_fit(X, Y, kernel=kern, X_variance=np.random.rand(X.shape[0], X.shape[1])) m = model_fit(X, Y, kernel=kern, X_variance=np.random.rand(X.shape[0], X.shape[1]))
else: else:
@ -60,13 +60,14 @@ class GradientTests(unittest.TestCase):
def test_GPRegression_mlp_1d(self): def test_GPRegression_mlp_1d(self):
''' Testing the GP regression with mlp kernel with white kernel on 1d data ''' ''' Testing the GP regression with mlp kernel with white kernel on 1d data '''
mlp = GPy.kern.mlp(1) mlp = GPy.kern.MLP(1)
self.check_model(mlp, model_type='GPRegression', dimension=1) self.check_model(mlp, model_type='GPRegression', dimension=1)
def test_GPRegression_poly_1d(self): #TODO:
''' Testing the GP regression with polynomial kernel with white kernel on 1d data ''' #def test_GPRegression_poly_1d(self):
mlp = GPy.kern.Poly(1, degree=5) # ''' Testing the GP regression with polynomial kernel with white kernel on 1d data '''
self.check_model(mlp, model_type='GPRegression', dimension=1) # mlp = GPy.kern.Poly(1, degree=5)
# self.check_model(mlp, model_type='GPRegression', dimension=1)
def test_GPRegression_matern52_1D(self): def test_GPRegression_matern52_1D(self):
''' Testing the GP regression with matern52 kernel on 1d data ''' ''' Testing the GP regression with matern52 kernel on 1d data '''
@ -163,14 +164,14 @@ class GradientTests(unittest.TestCase):
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2) rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2) self.check_model(rbflin, model_type='SparseGPRegression', dimension=2)
#@unittest.expectedFailure # @unittest.expectedFailure
def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self): def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs''' ''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs'''
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2) rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
raise unittest.SkipTest("This is not implemented yet!") raise unittest.SkipTest("This is not implemented yet!")
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1) self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1)
#@unittest.expectedFailure # @unittest.expectedFailure
def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self): def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs''' ''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs'''
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1) rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1)
@ -202,7 +203,7 @@ class GradientTests(unittest.TestCase):
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
kernel = GPy.kern.RBF(1) kernel = GPy.kern.RBF(1)
m = GPy.models.GPClassification(X,Y,kernel=kernel) m = GPy.models.GPClassification(X, Y, kernel=kernel)
m.update_likelihood_approximation() m.update_likelihood_approximation()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -212,11 +213,11 @@ class GradientTests(unittest.TestCase):
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
Z = np.linspace(0, 15, 4)[:, None] Z = np.linspace(0, 15, 4)[:, None]
kernel = GPy.kern.RBF(1) kernel = GPy.kern.RBF(1)
m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z) m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, Z=Z)
#distribution = GPy.likelihoods.likelihood_functions.Bernoulli() # distribution = GPy.likelihoods.likelihood_functions.Bernoulli()
#likelihood = GPy.likelihoods.EP(Y, distribution) # likelihood = GPy.likelihoods.EP(Y, distribution)
#m = GPy.core.SparseGP(X, likelihood, kernel, Z) # m = GPy.core.SparseGP(X, likelihood, kernel, Z)
#m.ensure_default_constraints() # m.ensure_default_constraints()
m.update_likelihood_approximation() m.update_likelihood_approximation()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -224,8 +225,8 @@ class GradientTests(unittest.TestCase):
N = 20 N = 20
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None] X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
k = GPy.kern.RBF(1) + GPy.kern.White(1) k = GPy.kern.RBF(1) + GPy.kern.White(1)
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None] Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
m = GPy.models.FITCClassification(X, Y, kernel = k) m = GPy.models.FITCClassification(X, Y, kernel=k)
m.update_likelihood_approximation() m.update_likelihood_approximation()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -238,7 +239,7 @@ class GradientTests(unittest.TestCase):
Y = np.vstack((Y1, Y2)) Y = np.vstack((Y1, Y2))
k1 = GPy.kern.RBF(1) k1 = GPy.kern.RBF(1)
m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) m = GPy.models.GPMultioutputRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel_list=[k1])
m.constrain_fixed('.*rbf_var', 1.) m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@ -251,7 +252,7 @@ class GradientTests(unittest.TestCase):
Y = np.vstack((Y1, Y2)) Y = np.vstack((Y1, Y2))
k1 = GPy.kern.RBF(1) k1 = GPy.kern.RBF(1)
m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) m = GPy.models.SparseGPMultioutputRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel_list=[k1])
m.constrain_fixed('.*rbf_var', 1.) m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -14,6 +14,7 @@ import subarray_and_sorting
import caching import caching
import diag import diag
import initialization import initialization
import multioutput
try: try:
import sympy import sympy

View file

@ -9,24 +9,27 @@ class Cacher(object):
""" """
def __init__(self, operation, limit=5, ignore_args=()): def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()):
self.limit = int(limit) self.limit = int(limit)
self.ignore_args = ignore_args self.ignore_args = ignore_args
self.force_kwargs = force_kwargs
self.operation=operation self.operation=operation
self.cached_inputs = [] self.cached_inputs = []
self.cached_outputs = [] self.cached_outputs = []
self.inputs_changed = [] self.inputs_changed = []
def __call__(self, *args): def __call__(self, *args, **kw):
""" """
A wrapper function for self.operation, A wrapper function for self.operation,
""" """
#ensure that specified arguments are ignored #ensure that specified arguments are ignored
items = sorted(kw.items(), key=lambda x: x[0])
oa_all = args + tuple(a for _,a in items)
if len(self.ignore_args) != 0: if len(self.ignore_args) != 0:
oa = [a for i,a in enumerate(args) if i not in self.ignore_args] oa = [a for i,a in itertools.chain(enumerate(args), items) if i not in self.ignore_args and i not in self.force_kwargs]
else: else:
oa = args oa = oa_all
# this makes sure we only add an observer once, and that None can be in args # this makes sure we only add an observer once, and that None can be in args
observable_args = [] observable_args = []
@ -37,36 +40,45 @@ class Cacher(object):
#make sure that all the found argument really are observable: #make sure that all the found argument really are observable:
#otherswise don't cache anything, pass args straight though #otherswise don't cache anything, pass args straight though
if not all([isinstance(arg, Observable) for arg in observable_args]): if not all([isinstance(arg, Observable) for arg in observable_args]):
return self.operation(*args) return self.operation(*args, **kw)
if len(self.force_kwargs) != 0:
# check if there are force args, which force reloading
for k in self.force_kwargs:
if k in kw and kw[k] is not None:
return self.operation(*args, **kw)
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING # TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
# return self.operation(*args) #return self.operation(*args)
#if the result is cached, return the cached computation #if the result is cached, return the cached computation
state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs] state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
if any(state): try:
i = state.index(True) if any(state):
if self.inputs_changed[i]: i = state.index(True)
#(elements of) the args have changed since we last computed: update if self.inputs_changed[i]:
self.cached_outputs[i] = self.operation(*args) #(elements of) the args have changed since we last computed: update
self.inputs_changed[i] = False self.cached_outputs[i] = self.operation(*args, **kw)
return self.cached_outputs[i] self.inputs_changed[i] = False
else: return self.cached_outputs[i]
#first time we've seen these arguments: compute else:
#first time we've seen these arguments: compute
#first make sure the depth limit isn't exceeded #first make sure the depth limit isn't exceeded
if len(self.cached_inputs) == self.limit: if len(self.cached_inputs) == self.limit:
args_ = self.cached_inputs.pop(0) args_ = self.cached_inputs.pop(0)
[a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None] [a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None]
self.inputs_changed.pop(0) self.inputs_changed.pop(0)
self.cached_outputs.pop(0) self.cached_outputs.pop(0)
#compute
#compute self.cached_inputs.append(oa_all)
self.cached_inputs.append(args) self.cached_outputs.append(self.operation(*args, **kw))
self.cached_outputs.append(self.operation(*args)) self.inputs_changed.append(False)
self.inputs_changed.append(False) [a.add_observer(self, self.on_cache_changed) for a in observable_args]
[a.add_observer(self, self.on_cache_changed) for a in observable_args] return self.cached_outputs[-1]#return
return self.cached_outputs[-1]#Max says return. except:
raise
finally:
self.reset()
def on_cache_changed(self, arg): def on_cache_changed(self, arg):
""" """
@ -76,7 +88,7 @@ class Cacher(object):
""" """
self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)] self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)]
def reset(self, obj): def reset(self):
""" """
Totally reset the cache Totally reset the cache
""" """
@ -90,15 +102,16 @@ class Cache_this(object):
""" """
A decorator which can be applied to bound methods in order to cache them A decorator which can be applied to bound methods in order to cache them
""" """
def __init__(self, limit=5, ignore_args=()): def __init__(self, limit=5, ignore_args=(), force_kwargs=()):
self.limit = limit self.limit = limit
self.ignore_args = ignore_args self.ignore_args = ignore_args
self.force_args = force_kwargs
self.c = None self.c = None
def __call__(self, f): def __call__(self, f):
def f_wrap(*args): def f_wrap(*args, **kw):
if self.c is None: if self.c is None:
self.c = Cacher(f, self.limit, ignore_args=self.ignore_args) self.c = Cacher(f, self.limit, ignore_args=self.ignore_args, force_kwargs=self.force_args)
return self.c(*args) return self.c(*args, **kw)
f_wrap._cacher = self f_wrap._cacher = self
f_wrap.__doc__ = "**cached**\n\n" + (f.__doc__ or "") f_wrap.__doc__ = "**cached**" + (f.__doc__ or "")
return f_wrap return f_wrap

View file

@ -32,6 +32,33 @@
"details":"Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing.", "details":"Artificially generated data of silhouettes given poses. Note that the data does not display a left/right ambiguity because across the entire data set one of the arms sticks out more the the other, disambiguating the pose as to which way the individual is facing.",
"size":1 "size":1
}, },
"football_data":{
"files":[
[
"E0.csv", "E1.csv", "E2.csv", "E3.csv"
]
],
"citation":"",
"license":null,
"urls":[
"http://www.football-data.co.uk/mmz4281/"
],
"details":"Results of English football matches since 1993/94 season.",
"size":1
},
"google_trends":{
"files":[
[
]
],
"citation":"",
"license":null,
"urls":[
"http://www.google.com/trends/"
],
"details":"Google trends results.",
"size":0
},
"osu_accad":{ "osu_accad":{
"files":[ "files":[
[ [

View file

@ -1,5 +1,8 @@
import csv
import os import os
import copy
import numpy as np import numpy as np
import pylab as pb
import GPy import GPy
import scipy.io import scipy.io
import cPickle as pickle import cPickle as pickle
@ -7,6 +10,8 @@ import zipfile
import tarfile import tarfile
import datetime import datetime
import json import json
import re
ipython_available=True ipython_available=True
try: try:
import IPython import IPython
@ -32,11 +37,18 @@ neil_url = 'http://staffwww.dcs.shef.ac.uk/people/N.Lawrence/dataset_mirror/'
# Read data resources from json file. # Read data resources from json file.
# Don't do this when ReadTheDocs is scanning as it breaks things # Don't do this when ReadTheDocs is scanning as it breaks things
on_rtd = os.environ.get('READTHEDOCS', None) == 'True' #Checks if RTD is scanning on_rtd = os.environ.get('READTHEDOCS', None) == 'True' #Checks if RTD is scanning
if not (on_rtd): if not (on_rtd):
path = os.path.join(os.path.dirname(__file__), 'data_resources.json') path = os.path.join(os.path.dirname(__file__), 'data_resources.json')
json_data=open(path).read() json_data=open(path).read()
data_resources = json.loads(json_data) data_resources = json.loads(json_data)
if not (on_rtd):
path = os.path.join(os.path.dirname(__file__), 'football_teams.json')
json_data=open(path).read()
football_dict = json.loads(json_data)
def prompt_user(prompt): def prompt_user(prompt):
"""Ask user for agreeing to data set licenses.""" """Ask user for agreeing to data set licenses."""
@ -276,6 +288,74 @@ def della_gatta_TRP63_gene_expression(data_set='della_gatta', gene_number=None):
def football_data(season='1314', data_set='football_data'):
"""Football data from English games since 1993. This downloads data from football-data.co.uk for the given season. """
def league2num(string):
league_dict = {'E0':0, 'E1':1, 'E2': 2, 'E3': 3, 'EC':4}
return league_dict[string]
def football2num(string):
if football_dict.has_key(string):
return football_dict[string]
else:
football_dict[string] = len(football_dict)+1
return len(football_dict)+1
data_set_season = data_set + '_' + season
data_resources[data_set_season] = copy.deepcopy(data_resources[data_set])
data_resources[data_set_season]['urls'][0]+=season + '/'
start_year = int(year[0:2])
end_year = int(year[2:4])
files = ['E0.csv', 'E1.csv', 'E2.csv', 'E3.csv']
if start_year>4 and start_year < 93:
files += ['EC.csv']
data_resources[data_set_season]['files'] = [files]
if not data_available(data_set_season):
download_data(data_set_season)
for file in reversed(files):
filename = os.path.join(data_path, data_set_season, file)
# rewrite files removing blank rows.
writename = os.path.join(data_path, data_set_season, 'temp.csv')
input = open(filename, 'rb')
output = open(writename, 'wb')
writer = csv.writer(output)
for row in csv.reader(input):
if any(field.strip() for field in row):
writer.writerow(row)
input.close()
output.close()
table = np.loadtxt(writename,skiprows=1, usecols=(0, 1, 2, 3, 4, 5), converters = {0: league2num, 1: pb.datestr2num, 2:football2num, 3:football2num}, delimiter=',')
X = table[:, :4]
Y = table[:, 4:]
return data_details_return({'X': X, 'Y': Y}, data_set)
# This will be for downloading google trends data.
def google_trends(query_terms=['big data', 'machine learning', 'data science'], data_set='google_trends'):
"""Data downloaded from Google trends for given query terms."""
# Inspired by this notebook:
# http://nbviewer.ipython.org/github/sahuguet/notebooks/blob/master/GoogleTrends%20meet%20Notebook.ipynb
# quote the query terms.
for i, element in enumerate(query_terms):
query_terms[i] = urllib2.quote(element)
query = 'http://www.google.com/trends/fetchComponent?q=%s&cid=TIMESERIES_GRAPH_0&export=3' % ",".join(query_terms)
data = urllib2.urlopen(query).read()
# In the notebook they did some data cleaning: remove Javascript header+footer, and translate new Date(....,..,..) into YYYY-MM-DD.
header = """// Data table response\ngoogle.visualization.Query.setResponse("""
data = data[len(header):-2]
data = re.sub('new Date\((\d+),(\d+),(\d+)\)', (lambda m: '"%s-%02d-%02d"' % (m.group(1).strip(), 1+int(m.group(2)), int(m.group(3)))), data)
timeseries = json.loads(data)
#import pandas as pd
columns = [k['label'] for k in timeseries['table']['cols']]
rows = map(lambda x: [k['v'] for k in x['c']], timeseries['table']['rows'])
terms = len(columns)-1
X = np.asarray([(pb.datestr2num(row[0]), i) for i in range(terms) for row in rows ])
Y = np.asarray([[row[i+1]] for i in range(terms) for row in rows ])
output_info = columns[1:]
return data_details_return({'X': X, 'Y': Y, 'query_terms': output_info, 'info': "Data downloaded from google trends with query terms: " + ', '.join(output_info) + '.'}, data_set)
# The data sets # The data sets
def oil(data_set='three_phase_oil_flow'): def oil(data_set='three_phase_oil_flow'):
"""The three phase oil data from Bishop and James (1993).""" """The three phase oil data from Bishop and James (1993)."""

View file

@ -0,0 +1 @@
{"Canvey Island": 94, "Crewe": 21, "Fleetwood Town": 134, "Wrexham": 89, "Barnet": 69, "Ipswich": 29, "Rochdale": 84, "Bristol Rvs": 70, "Liverpool": 10, "Chelsea": 20, "York": 113, "Newcastle": 18, "QPR": 28, "Middlesboro": 116, "Tranmere": 68, "Bury": 72, "Luton": 24, "AFC Wimbledon": 126, "West Ham": 15, "Braintree Town": 135, "Bournemouth": 58, "Hayes & Yeading": 130, "Rushden & D": 81, "Weymouth": 120, "Chesterfield": 48, "Exeter": 104, "Barnsley": 45, "Aldershot": 95, "Gateshead": 129, "Hartlepool": 55, "Newport County": 132, "Crystal Palace": 23, "Ebbsfleet": 123, "Wigan": 19, "Shrewsbury": 83, "Hereford": 105, "Stevenage": 111, "Grimsby": 73, "Crawley Town": 114, "Morecambe": 109, "Oldham": 61, "Aston Villa": 1, "Bristol City": 51, "Gravesend": 103, "Huddersfield": 60, "Reading": 33, "Nuneaton Town": 140, "AFC Telford United": 137, "Wycombe": 91, "Leeds": 43, "Colchester": 54, "Rotherham": 63, "Southport": 100, "Southampton": 37, "Darlington": 82, "Blackburn": 16, "Bath City": 133, "Yeovil": 62, "Leyton Orient": 75, "Forest Green": 101, "Chester": 80, "Halifax": 110, "Portsmouth": 11, "Woking": 108, "Histon": 125, "Man City": 7, "Northampton": 78, "Arsenal": 17, "Charlton": 14, "Middlesbrough": 9, "Watford": 41, "Nott'm Forest": 59, "Eastbourne Borough": 131, "Hull": 27, "Barrow": 127, "Doncaster": 52, "Carlisle": 92, "Gillingham": 53, "Accrington": 93, "Dartford": 139, "Altrincham": 112, "Scarborough": 106, "Northwich": 117, "Farsley": 124, "Tamworth": 96, "St. Albans": 119, "Alfreton Town": 136, "Mansfield": 86, "Macclesfield": 76, "Torquay": 87, "Brighton": 26, "Bradford": 56, "Lincoln": 77, "Brentford": 49, "Everton": 3, "Cambridge": 102, "Sheffield United": 35, "Stockport": 85, "Bolton": 2, "Southend": 65, "Cheltenham": 71, "Walsall": 64, "Preston": 42, "Peterboro": 79, "Birmingham": 6, "Boston": 90, "Burton": 97, "West Brom": 8, "Man United": 4, "Stafford Rangers": 118, "Wimbledon": 115, "Scunthorpe": 50, "Kidderminster": 107, "Millwall": 44, "Swansea": 67, "Norwich": 31, "Burnley": 22, "Sunderland": 13, "Sheffield Weds": 40, "Fulham": 5, "Dag and Red": 99, "Oxford": 74, "Stoke": 39, "Tottenham": 12, "Kettering Town": 128, "Coventry": 32, "Wolves": 38, "Port Vale": 66, "Milton Keynes Dons": 57, "Plymouth": 34, "Derby": 25, "Notts County": 88, "Leicester": 36, "Droylsden": 121, "Blackpool": 47, "Salisbury": 122, "Cardiff": 30, "Grays": 98, "Swindon": 46, "Hyde United": 138}

View file

@ -1,12 +1,17 @@
import numpy as np import numpy as np
import warnings import warnings
from .. import kern import GPy
def build_XY(input_list,output_list=None,index=None):
def get_slices(input_list):
num_outputs = len(input_list) num_outputs = len(input_list)
_s = [0] + [ _x.shape[0] for _x in input_list ] _s = [0] + [ _x.shape[0] for _x in input_list ]
_s = np.cumsum(_s) _s = np.cumsum(_s)
slices = [slice(a,b) for a,b in zip(_s[:-1],_s[1:])] slices = [slice(a,b) for a,b in zip(_s[:-1],_s[1:])]
return slices
def build_XY(input_list,output_list=None,index=None):
num_outputs = len(input_list)
if output_list is not None: if output_list is not None:
assert num_outputs == len(output_list) assert num_outputs == len(output_list)
Y = np.vstack(output_list) Y = np.vstack(output_list)
@ -15,42 +20,84 @@ def build_XY(input_list,output_list=None,index=None):
if index is not None: if index is not None:
assert len(index) == num_outputs assert len(index) == num_outputs
I = np.vstack( [j*np.ones((_x.shape[0],1)) for _x,j in zip(input_list,index)] ) I = np.hstack( [np.repeat(j,_x.shape[0]) for _x,j in zip(input_list,index)] )
else: else:
I = np.vstack( [j*np.ones((_x.shape[0],1)) for _x,j in zip(input_list,range(num_outputs))] ) I = np.hstack( [np.repeat(j,_x.shape[0]) for _x,j in zip(input_list,range(num_outputs))] )
X = np.vstack(input_list) X = np.vstack(input_list)
X = np.hstack([X,I]) X = np.hstack([X,I[:,None]])
return X,Y,slices
def build_lcm(input_dim, num_outputs, CK = [], NC = [], W_columns=1,W=None,kappa=None): return X,Y,I[:,None]#slices
#TODO build_icm or build_lcm
def build_likelihood(Y_list,noise_index,likelihoods_list=None):
Ny = len(Y_list)
if likelihoods_list is None:
likelihoods_list = [GPy.likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for y,j in zip(Y_list,range(Ny))]
else:
assert len(likelihoods_list) == Ny
#likelihood = GPy.likelihoods.mixed_noise.MixedNoise(likelihoods_list=likelihoods_list, noise_index=noise_index)
likelihood = GPy.likelihoods.mixed_noise.MixedNoise(likelihoods_list=likelihoods_list)
return likelihood
def ICM(input_dim, num_outputs, kernel, W_rank=1,W=None,kappa=None,name='X'):
""" """
Builds a kernel for a linear coregionalization model Builds a kernel for an Intrinsic Coregionalization Model
:input_dim: Input dimensionality (does not include dimension of indices)
:num_outputs: Number of outputs
:param kernel: kernel that will be multiplied by the coregionalize kernel (matrix B).
:type kernel: a GPy kernel
:param W_rank: number tuples of the corregionalization parameters 'W'
:type W_rank: integer
"""
if kernel.input_dim <> input_dim:
kernel.input_dim = input_dim
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
K = kernel.prod(GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B'),name=name)
#K = kernel * GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B')
#K = kernel ** GPy.kern.Coregionalize(input_dim, num_outputs,W_rank,W,kappa, name= 'B')
K['.*variance'] = 1.
K['.*variance'].fix()
return K
def LCM(input_dim, num_outputs, kernels_list, W_rank=1,name='X'):
"""
Builds a kernel for an Linear Coregionalization Model
:input_dim: Input dimensionality (does not include dimension of indices)
:num_outputs: Number of outputs
:param kernel: kernel that will be multiplied by the coregionalize kernel (matrix B).
:type kernel: a GPy kernel
:param W_rank: number tuples of the corregionalization parameters 'W'
:type W_rank: integer
"""
Nk = len(kernels_list)
K = ICM(input_dim,num_outputs,kernels_list[0],W_rank,name='%s%s' %(name,0))
j = 1
for kernel in kernels_list[1:]:
K += ICM(input_dim,num_outputs,kernel,W_rank,name='%s%s' %(name,j))
return K
def Private(input_dim, num_outputs, kernel, output, kappa=None,name='X'):
"""
Builds a kernel for an Intrinsic Coregionalization Model
:input_dim: Input dimensionality :input_dim: Input dimensionality
:num_outputs: Number of outputs :num_outputs: Number of outputs
:param CK: List of coregionalized kernels (i.e., this will be multiplied by a coregionalize kernel). :param kernel: kernel that will be multiplied by the coregionalize kernel (matrix B).
:param K: List of kernels that will be added up together with CK, but won't be multiplied by a coregionalize kernel :type kernel: a GPy kernel
:param W_columns: number tuples of the corregionalization parameters 'coregion_W' :param W_rank: number tuples of the corregionalization parameters 'W'
:type W_columns: integer :type W_rank: integer
""" """
K = ICM(input_dim,num_outputs,kernel,W_rank=1,kappa=kappa,name=name)
for k in CK: K.B.W.fix(0)
if k.input_dim <> input_dim: _range = range(num_outputs)
k.input_dim = input_dim _range.pop(output)
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.") for j in _range:
K.B.kappa[j] = 0
for k in NC: K.B.kappa[j].fix()
if k.input_dim <> input_dim + 1: return K
k.input_dim = input_dim + 1
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
kernel = CK[0].prod(kern.Coregionalize(num_outputs,W_columns,W,kappa),tensor=True)
for k in CK[1:]:
k_coreg = kern.Coregionalize(num_outputs,W_columns,W,kappa)
kernel += k.prod(k_coreg,tensor=True)
for k in NC:
kernel += k
return kernel