Merge branch 'params' of https://github.com/SheffieldML/GPy into params

This commit is contained in:
Neil Lawrence 2014-03-13 15:59:34 +00:00
commit 9562477562
46 changed files with 980 additions and 701 deletions

View file

@ -27,7 +27,7 @@ class GP(Model):
"""
def __init__(self, X, Y, kernel, likelihood, inference_method=None, Y_metadata=None, name='gp'):
def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', **Y_metadata):
super(GP, self).__init__(name)
assert X.ndim == 2
@ -43,12 +43,12 @@ class GP(Model):
_, self.output_dim = self.Y.shape
if Y_metadata is not None:
self.Y_metadata = ObservableArray(Y_metadata)
self.Y_metadata = Y_metadata
else:
self.Y_metadata = None
assert isinstance(kernel, kern.Kern)
assert self.input_dim == kernel.input_dim
#assert self.input_dim == kernel.input_dim
self.kern = kernel
assert isinstance(likelihood, likelihoods.Likelihood)
@ -56,7 +56,7 @@ class GP(Model):
#find a sensible inference method
if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian):
if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise):
inference_method = exact_gaussian_inference.ExactGaussianInference()
else:
inference_method = expectation_propagation
@ -67,8 +67,9 @@ class GP(Model):
self.add_parameter(self.likelihood)
def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, Y_metadata=self.Y_metadata)
self.kern.update_gradients_full(grad_dict['dL_dK'], self.X)
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y, **self.Y_metadata)
self.likelihood.update_gradients(np.diag(self.grad_dict['dL_dK']), **self.Y_metadata)
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
def log_likelihood(self):
return self._log_marginal_likelihood
@ -185,7 +186,7 @@ class GP(Model):
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots
models_plots.plot_fit_f(self,*args,**kwargs)
return models_plots.plot_fit_f(self,*args,**kwargs)
def plot(self, *args, **kwargs):
"""
@ -206,7 +207,7 @@ class GP(Model):
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots
models_plots.plot_fit(self,*args,**kwargs)
return models_plots.plot_fit(self,*args,**kwargs)
def _getstate(self):
"""

View file

@ -271,7 +271,7 @@ class Model(Parameterized):
and numerical gradients is within <tolerance> of unity.
"""
x = self._get_params_transformed().copy()
if not verbose:
# make sure only to test the selected parameters
if target_param is None:
@ -298,12 +298,12 @@ class Model(Parameterized):
dx = dx[transformed_index]
gradient = gradient[transformed_index]
denominator = (2 * np.dot(dx, gradient))
global_ratio = (f1 - f2) / np.where(denominator==0., 1e-32, denominator)
gloabl_diff = (f1 - f2) - denominator
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gloabl_diff) < tolerance)
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gloabl_diff) == 0)
else:
# check the gradient of each parameter individually, and do some pretty printing
try:
@ -339,7 +339,7 @@ class Model(Parameterized):
print "No free parameters to check"
return
gradient = self.objective_function_gradients(x)
gradient = self.objective_function_gradients(x).copy()
np.where(gradient == 0, 1e-312, gradient)
ret = True
for nind, xind in itertools.izip(param_index, transformed_index):
@ -366,7 +366,7 @@ class Model(Parameterized):
ng = '%.6f' % float(numerical_gradient)
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
print grad_string
self._set_params_transformed(x)
return ret

View file

@ -6,12 +6,6 @@ __updated__ = '2013-12-16'
import numpy as np
from parameter_core import Observable
class _Array(np.ndarray):
def __init__(self, dtype=float, buffer=None, offset=0,
strides=None, order=None, *args, **kwargs):
super(_Array, self).__init__(dtype=dtype, buffer=buffer, offset=offset,
strides=strides, order=order, *args, **kwargs)
class ObservableArray(np.ndarray, Observable):
"""
An ndarray which reports changes to its observers.
@ -22,7 +16,7 @@ class ObservableArray(np.ndarray, Observable):
__array_priority__ = -1 # Never give back ObservableArray
def __new__(cls, input_array, *a, **kw):
if not isinstance(input_array, ObservableArray):
obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['C', 'W'])).view(cls)
obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls)
else: obj = input_array
cls.__name__ = "ObservableArray\n "
super(ObservableArray, obj).__init__(*a, **kw)

View file

@ -94,15 +94,15 @@ class Param(OptimizationHandlable, ObservableArray):
@property
def _param_array_(self):
return self
@property
def gradient(self):
return self._gradient_array_[self._current_slice_]
@gradient.setter
def gradient(self, val):
self.gradient[:] = val
#===========================================================================
# Pickling operations
#===========================================================================
@ -135,7 +135,7 @@ class Param(OptimizationHandlable, ObservableArray):
self._parent_index_ = state.pop()
self._parent_ = state.pop()
self.name = state.pop()
def copy(self, *args):
constr = self.constraints.copy()
priors = self.priors.copy()
@ -151,13 +151,13 @@ class Param(OptimizationHandlable, ObservableArray):
# if trigger_parent: min_priority = None
# else: min_priority = -numpy.inf
# self.notify_observers(None, min_priority)
#
#
# def _get_params(self):
# return self.flat
#
#
# def _collect_gradient(self, target):
# target += self.gradient.flat
#
#
# def _set_gradient(self, g):
# self.gradient = g.reshape(self._realshape_)
@ -173,10 +173,10 @@ class Param(OptimizationHandlable, ObservableArray):
try: new_arr._current_slice_ = s; new_arr._original_ = self.base is new_arr.base
except AttributeError: pass # returning 0d array or float, double etc
return new_arr
def __setitem__(self, s, val):
super(Param, self).__setitem__(s, val)
#===========================================================================
# Index Operations:
#===========================================================================
@ -195,7 +195,7 @@ class Param(OptimizationHandlable, ObservableArray):
a = self._realshape_[i] + a
internal_offset += a * extended_realshape[i]
return internal_offset
def _raveled_index(self, slice_index=None):
# return an index array on the raveled array, which is formed by the current_slice
# of this object
@ -203,7 +203,7 @@ class Param(OptimizationHandlable, ObservableArray):
ind = self._indices(slice_index)
if ind.ndim < 2: ind = ind[:, None]
return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int)
def _expand_index(self, slice_index=None):
# this calculates the full indexing arrays from the slicing objects given by get_item for _real..._ attributes
# it basically translates slices to their respective index arrays and turns negative indices around
@ -245,7 +245,7 @@ class Param(OptimizationHandlable, ObservableArray):
#===========================================================================
@property
def _description_str(self):
if self.size <= 1:
if self.size <= 1:
return [str(self.view(numpy.ndarray)[0])]
else: return [str(self.shape)]
def parameter_names(self, add_self=False, adjust_for_printing=False):
@ -356,7 +356,7 @@ class ParamConcatenation(object):
self._param_sizes = [p.size for p in self.params]
startstops = numpy.cumsum([0] + self._param_sizes)
self._param_slices_ = [slice(start, stop) for start,stop in zip(startstops, startstops[1:])]
parents = dict()
for p in self.params:
if p.has_parent():
@ -396,7 +396,7 @@ class ParamConcatenation(object):
def update_all_params(self):
for par in self.parents:
par.notify_observers(-numpy.inf)
def constrain(self, constraint, warning=True):
[param.constrain(constraint, trigger_parent=False) for param in self.params]
self.update_all_params()

View file

@ -1,7 +1,7 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Core module for parameterization.
Core module for parameterization.
This module implements all parameterization techniques, split up in modular bits.
HierarchyError:
@ -15,9 +15,8 @@ Observable Pattern for patameterization
from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
import numpy as np
import itertools
__updated__ = '2013-12-16'
__updated__ = '2014-03-13'
class HierarchyError(Exception):
"""
@ -35,18 +34,19 @@ def adjust_name_for_printing(name):
class Observable(object):
"""
Observable pattern for parameterization.
This Object allows for observers to register with self and a (bound!) function
as an observer. Every time the observable changes, it sends a notification with
self as only argument to all its observers.
"""
_updated = True
def __init__(self, *args, **kwargs):
super(Observable, self).__init__()
self._observer_callables_ = []
def add_observer(self, observer, callble, priority=0):
self._insert_sorted(priority, observer, callble)
def remove_observer(self, observer, callble=None):
to_remove = []
for p, obs, clble in self._observer_callables_:
@ -58,15 +58,15 @@ class Observable(object):
to_remove.append((p, obs, clble))
for r in to_remove:
self._observer_callables_.remove(r)
def notify_observers(self, which=None, min_priority=None):
"""
Notifies all observers. Which is the element, which kicked off this
Notifies all observers. Which is the element, which kicked off this
notification loop.
NOTE: notifies only observers with priority p > min_priority!
^^^^^^^^^^^^^^^^
:param which: object, which started this notification loop
:param min_priority: only notify observers with priority > min_priority
if min_priority is None, notify all observers in order
@ -88,14 +88,14 @@ class Observable(object):
break
ins += 1
self._observer_callables_.insert(ins, (p, o, c))
class Pickleable(object):
"""
Make an object pickleable (See python doc 'pickling').
Make an object pickleable (See python doc 'pickling').
This class allows for pickling support by Memento pattern.
_getstate returns a memento of the class, which gets pickled.
_setstate(<memento>) (re-)sets the state of the class to the memento
_setstate(<memento>) (re-)sets the state of the class to the memento
"""
#===========================================================================
# Pickling operations
@ -112,14 +112,14 @@ class Pickleable(object):
with open(f, 'w') as f:
cPickle.dump(self, f, protocol)
else:
cPickle.dump(self, f, protocol)
cPickle.dump(self, f, protocol)
def __getstate__(self):
if self._has_get_set_state():
return self._getstate()
return self.__dict__
def __setstate__(self, state):
if self._has_get_set_state():
self._setstate(state)
self._setstate(state)
# TODO: maybe parameters_changed() here?
return
self.__dict__ = state
@ -153,7 +153,7 @@ class Pickleable(object):
class Parentable(object):
"""
Enable an Object to have a parent.
Additionally this adds the parent_index, which is the index for the parent
to look for in its parameter list.
"""
@ -161,7 +161,7 @@ class Parentable(object):
_parent_index_ = None
def __init__(self, *args, **kwargs):
super(Parentable, self).__init__()
def has_parent(self):
"""
Return whether this parentable object currently has a parent.
@ -201,20 +201,20 @@ class Gradcheckable(Parentable):
Adds the functionality for an object to be gradcheckable.
It is just a thin wrapper of a call to the highest parent for now.
TODO: Can be done better, by only changing parameters of the current parameter handle,
such that object hierarchy only has to change for those.
such that object hierarchy only has to change for those.
"""
def __init__(self, *a, **kw):
super(Gradcheckable, self).__init__(*a, **kw)
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
"""
Check the gradient of this parameter with respect to the highest parent's
Check the gradient of this parameter with respect to the highest parent's
objective function.
This is a three point estimate of the gradient, wiggling at the parameters
with a stepsize step.
The check passes if either the ratio or the difference between numerical and
The check passes if either the ratio or the difference between numerical and
analytical gradient is smaller then tolerance.
:param bool verbose: whether each parameter shall be checked individually.
:param float step: the stepsize for the numerical three point gradient estimate.
:param flaot tolerance: the tolerance for the gradient ratio or difference.
@ -222,12 +222,13 @@ class Gradcheckable(Parentable):
if self.has_parent():
return self._highest_parent_._checkgrad(self, verbose=verbose, step=step, tolerance=tolerance)
return self._checkgrad(self[''], verbose=verbose, step=step, tolerance=tolerance)
def _checkgrad(self, param):
def _checkgrad(self, param, verbose=0, step=1e-6, tolerance=1e-3):
"""
Perform the checkgrad on the model.
TODO: this can be done more efficiently, when doing it inside here
"""
raise NotImplementedError, "Need log likelihood to check gradient against"
raise HierarchyError, "This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!"
class Nameable(Gradcheckable):
@ -258,7 +259,7 @@ class Nameable(Gradcheckable):
def hierarchy_name(self, adjust_for_printing=True):
"""
return the name for this object with the parents names attached by dots.
:param bool adjust_for_printing: whether to call :func:`~adjust_for_printing()`
on the names, recursively
"""
@ -275,21 +276,21 @@ class Indexable(object):
"""
def __init__(self, *a, **kw):
super(Indexable, self).__init__()
def _raveled_index(self):
"""
Flattened array of ints, specifying the index of this object.
This has to account for shaped parameters!
"""
raise NotImplementedError, "Need to be able to get the raveled Index"
def _internal_offset(self):
"""
The offset for this parameter inside its parent.
The offset for this parameter inside its parent.
This has to account for shaped parameters!
"""
return 0
def _offset_for(self, param):
"""
Return the offset of the param inside this parameterized object.
@ -297,35 +298,35 @@ class Indexable(object):
basically just sums up the parameter sizes which come before param.
"""
raise NotImplementedError, "shouldnt happen, offset required from non parameterization object?"
def _raveled_index_for(self, param):
"""
get the raveled index for a param
that is an int array, containing the indexes for the flattened
param inside this parameterized logic.
"""
raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
raise NotImplementedError, "shouldnt happen, raveld index transformation required from non parameterization object?"
class Constrainable(Nameable, Indexable):
class Constrainable(Nameable, Indexable, Observable):
"""
Make an object constrainable with Priors and Transformations.
TODO: Mappings!!
Adding a constraint to a Parameter means to tell the highest parent that
the constraint was added and making sure that all parameters covered
by this object are indeed conforming to the constraint.
:func:`constrain()` and :func:`unconstrain()` are main methods here
"""
def __init__(self, name, default_constraint=None, *a, **kw):
super(Constrainable, self).__init__(name=name, default_constraint=default_constraint, *a, **kw)
super(Constrainable, self).__init__(name=name, *a, **kw)
self._default_constraint_ = default_constraint
from index_operations import ParameterIndexOperations
self.constraints = ParameterIndexOperations()
self.priors = ParameterIndexOperations()
if self._default_constraint_ is not None:
self.constrain(self._default_constraint_)
def _disconnect_parent(self, constr=None, *args, **kw):
"""
From Parentable:
@ -339,7 +340,7 @@ class Constrainable(Nameable, Indexable):
self._parent_index_ = None
self._connect_fixes()
self._notify_parent_change()
#===========================================================================
# Fixing Parameters:
#===========================================================================
@ -351,24 +352,26 @@ class Constrainable(Nameable, Indexable):
"""
if value is not None:
self[:] = value
self.constrain(__fixed__, warning=warning, trigger_parent=trigger_parent)
reconstrained = self.unconstrain()
self._add_to_index_operations(self.constraints, reconstrained, __fixed__, warning)
rav_i = self._highest_parent_._raveled_index_for(self)
self._highest_parent_._set_fixed(rav_i)
self.notify_observers(self, None if trigger_parent else -np.inf)
fix = constrain_fixed
def unconstrain_fixed(self):
"""
This parameter will no longer be fixed.
"""
unconstrained = self.unconstrain(__fixed__)
self._highest_parent_._set_unfixed(unconstrained)
self._highest_parent_._set_unfixed(unconstrained)
unfix = unconstrain_fixed
def _set_fixed(self, index):
if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
self._fixes_[index] = FIXED
if np.all(self._fixes_): self._fixes_ = None # ==UNFIXED
def _set_unfixed(self, index):
if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
# rav_i = self._raveled_index_for(param)[index]
@ -382,7 +385,7 @@ class Constrainable(Nameable, Indexable):
self._fixes_[fixed_indices] = FIXED
else:
self._fixes_ = None
def _has_fixes(self):
return hasattr(self, "_fixes_") and self._fixes_ is not None
@ -397,21 +400,21 @@ class Constrainable(Nameable, Indexable):
"""
repriorized = self.unset_priors()
self._add_to_index_operations(self.priors, repriorized, prior, warning)
def unset_priors(self, *priors):
"""
Un-set all priors given from this parameter handle.
"""
return self._remove_from_index_operations(self.priors, priors)
def log_prior(self):
"""evaluate the prior"""
if self.priors.size > 0:
x = self._get_params()
return reduce(lambda a, b: a + b, [p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()], 0)
return 0.
def _log_prior_gradients(self):
"""evaluate the gradients of the priors"""
if self.priors.size > 0:
@ -420,7 +423,7 @@ class Constrainable(Nameable, Indexable):
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
return ret
return 0.
#===========================================================================
# Constrain operations -> done
#===========================================================================
@ -434,10 +437,10 @@ class Constrainable(Nameable, Indexable):
Constrain the parameter to the given
:py:class:`GPy.core.transformations.Transformation`.
"""
if isinstance(transform, Transformation):
self._param_array_[:] = transform.initialize(self._param_array_)
self._param_array_[:] = transform.initialize(self._param_array_)
reconstrained = self.unconstrain()
self._add_to_index_operations(self.constraints, reconstrained, transform, warning)
self.notify_observers(self, None if trigger_parent else -np.inf)
def unconstrain(self, *transforms):
"""
@ -447,7 +450,7 @@ class Constrainable(Nameable, Indexable):
transformats of this parameter object.
"""
return self._remove_from_index_operations(self.constraints, transforms)
def constrain_positive(self, warning=True, trigger_parent=True):
"""
:param warning: print a warning if re-constraining parameters.
@ -492,7 +495,7 @@ class Constrainable(Nameable, Indexable):
Remove (lower, upper) bounded constrain from this parameter/
"""
self.unconstrain(Logistic(lower, upper))
def _parent_changed(self, parent):
"""
From Parentable:
@ -521,7 +524,7 @@ class Constrainable(Nameable, Indexable):
def _remove_from_index_operations(self, which, what):
"""
Helper preventing copy code.
Remove given what (transform prior etc) from which param index ops.
Remove given what (transform prior etc) from which param index ops.
"""
if len(what) == 0:
transforms = which.properties()
@ -531,10 +534,10 @@ class Constrainable(Nameable, Indexable):
removed = np.union1d(removed, unconstrained)
if t is __fixed__:
self._highest_parent_._set_unfixed(unconstrained)
return removed
class OptimizationHandlable(Constrainable, Observable):
class OptimizationHandlable(Constrainable):
"""
This enables optimization handles on an Object as done in GPy 0.4.
@ -542,13 +545,13 @@ class OptimizationHandlable(Constrainable, Observable):
"""
def __init__(self, name, default_constraint=None, *a, **kw):
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
def transform(self):
[np.put(self._param_array_, ind, c.finv(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def untransform(self):
[np.put(self._param_array_, ind, c.f(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def _get_params_transformed(self):
# transformed parameters (apply transformation rules)
p = self._param_array_.copy()
@ -564,23 +567,21 @@ class OptimizationHandlable(Constrainable, Observable):
else: self._param_array_[:] = p
self.untransform()
self._trigger_params_changed()
def _trigger_params_changed(self, trigger_parent=True):
[p._trigger_params_changed(trigger_parent=False) for p in self._parameters_]
if trigger_parent: min_priority = None
else: min_priority = -np.inf
self.notify_observers(None, min_priority)
self.notify_observers(None, None if trigger_parent else -np.inf)
def _size_transformed(self):
return self.size - self.constraints[__fixed__].size
#
#
# def _untransform_params(self, p):
# # inverse apply transformations for parameters
# #p = p.copy()
# if self._has_fixes(): tmp = self._get_params(); tmp[self._fixes_] = p; p = tmp; del tmp
# [np.put(p, ind, c.f(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
# return p
#
#
# def _get_params(self):
# """
# get all parameters
@ -591,7 +592,7 @@ class OptimizationHandlable(Constrainable, Observable):
# return p
# [np.put(p, ind, par._get_params()) for ind, par in itertools.izip(self._param)]
# return p
# def _set_params(self, params, trigger_parent=True):
# self._param_array_.flat = params
# if trigger_parent: min_priority = None
@ -599,14 +600,14 @@ class OptimizationHandlable(Constrainable, Observable):
# self.notify_observers(None, min_priority)
# don't overwrite this anymore!
#raise NotImplementedError, "Abstract superclass: This needs to be implemented in Param and Parameterizable"
#===========================================================================
# Optimization handles:
#===========================================================================
def _get_param_names(self):
n = np.array([p.hierarchy_name() + '[' + str(i) + ']' for p in self.flattened_parameters for i in p._indices()])
return n
def _get_param_names_transformed(self):
n = self._get_param_names()
if self._has_fixes():
@ -620,7 +621,7 @@ class OptimizationHandlable(Constrainable, Observable):
"""
Randomize the model.
Make this draw from the prior if one exists, else draw from given random generator
:param rand_gen: numpy random number generator which takes args and kwargs
:param flaot loc: loc parameter for random number generator
:param float scale: scale parameter for random number generator
@ -643,10 +644,10 @@ class OptimizationHandlable(Constrainable, Observable):
self._param_array_[pislice] = pi._param_array_.ravel()#, requirements=['C', 'W']).flat
self._gradient_array_[pislice] = pi._gradient_array_.ravel()#, requirements=['C', 'W']).flat
pi._param_array_.data = parray[pislice].data
pi._gradient_array_.data = garray[pislice].data
pi._propagate_param_grad(parray[pislice], garray[pislice])
pi_old_size += pi.size
@ -659,11 +660,11 @@ class Parameterizable(OptimizationHandlable):
self._param_array_ = np.empty(self.size, dtype=np.float64)
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
self._added_names_ = set()
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
"""
Get the names of all parameters of this model.
Get the names of all parameters of this model.
:param bool add_self: whether to add the own name in front of names
:param bool adjust_for_printing: whether to call `adjust_name_for_printing` on names
:param bool recursive: whether to traverse through hierarchy and append leaf node names
@ -674,11 +675,11 @@ class Parameterizable(OptimizationHandlable):
else: names = [adjust(x.name) for x in self._parameters_]
if add_self: names = map(lambda x: adjust(self.name) + "." + x, names)
return names
@property
def num_params(self):
return len(self._parameters_)
def _add_parameter_name(self, param, ignore_added_names=False):
pname = adjust_name_for_printing(param.name)
if ignore_added_names:
@ -693,7 +694,11 @@ class Parameterizable(OptimizationHandlable):
elif pname not in dir(self):
self.__dict__[pname] = param
self._added_names_.add(pname)
else:
print "WARNING: added a parameter with formatted name {}, which is already a member of {} object. Trying to change the parameter name to\n {}".format(pname, self.__class__, param.name+"_")
param.name += "_"
self._add_parameter_name(param, ignore_added_names)
def _remove_parameter_name(self, param=None, pname=None):
assert param is None or pname is None, "can only delete either param by name, or the name of a param"
pname = adjust_name_for_printing(pname) or adjust_name_for_printing(param.name)
@ -705,14 +710,14 @@ class Parameterizable(OptimizationHandlable):
def _name_changed(self, param, old_name):
self._remove_parameter_name(None, old_name)
self._add_parameter_name(param)
#=========================================================================
# Gradient handling
#=========================================================================
@property
def gradient(self):
return self._gradient_array_
return self._gradient_array_
@gradient.setter
def gradient(self, val):
self._gradient_array_[:] = val
@ -733,13 +738,13 @@ class Parameterizable(OptimizationHandlable):
# def _set_gradient(self, g):
# [p._set_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_)]
#===========================================================================
def add_parameter(self, param, index=None, _ignore_added_names=False):
"""
:param parameters: the parameters to add
:type parameters: list of or one :py:class:`GPy.core.param.Param`
:param [index]: index of where to put parameters
:param bool _ignore_added_names: whether the name of the parameter overrides a possibly existing field
Add all parameters to this param class, you can insert parameters
@ -770,9 +775,9 @@ class Parameterizable(OptimizationHandlable):
self.constraints.update(param.constraints, start)
self.priors.update(param.priors, start)
self._parameters_.insert(index, param)
param.add_observer(self, self._pass_through_notify_observers, -np.inf)
self.size += param.size
self._connect_parameters(ignore_added_names=_ignore_added_names)
@ -795,65 +800,62 @@ class Parameterizable(OptimizationHandlable):
"""
if not param in self._parameters_:
raise RuntimeError, "Parameter {} does not belong to this object, remove parameters directly from their respective parents".format(param._short())
start = sum([p.size for p in self._parameters_[:param._parent_index_]])
self._remove_parameter_name(param)
self.size -= param.size
del self._parameters_[param._parent_index_]
param._disconnect_parent()
param.remove_observer(self, self._pass_through_notify_observers)
self.constraints.shift_left(start, param.size)
self._connect_fixes()
self._connect_parameters()
self._notify_parent_change()
parent = self._parent_
while parent is not None:
parent._connect_fixes()
parent._connect_parameters()
parent._notify_parent_change()
parent = parent._parent_
def _connect_parameters(self, ignore_added_names=False):
# connect parameterlist to this parameterized object
# This just sets up the right connection for the params objects
# to be used as parameters
# it also sets the constraints for each parameter to the constraints
# of their respective parents
# it also sets the constraints for each parameter to the constraints
# of their respective parents
if not hasattr(self, "_parameters_") or len(self._parameters_) < 1:
# no parameters for this class
return
old_size = 0
self._param_array_ = np.empty(self.size, dtype=np.float64)
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
self._param_slices_ = []
for i, p in enumerate(self._parameters_):
p._parent_ = self
p._parent_index_ = i
pslice = slice(old_size, old_size+p.size)
# first connect all children
p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice])
p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice])
# then connect children to self
self._param_array_[pslice] = p._param_array_.ravel()#, requirements=['C', 'W']).ravel(order='C')
self._gradient_array_[pslice] = p._gradient_array_.ravel()#, requirements=['C', 'W']).ravel(order='C')
if not p._param_array_.flags['C_CONTIGUOUS']:
import ipdb;ipdb.set_trace()
p._param_array_.data = self._param_array_[pslice].data
p._gradient_array_.data = self._gradient_array_[pslice].data
self._param_slices_.append(pslice)
self._add_parameter_name(p, ignore_added_names=ignore_added_names)
old_size += p.size
#===========================================================================
# notification system
#===========================================================================
@ -861,12 +863,13 @@ class Parameterizable(OptimizationHandlable):
self.parameters_changed()
def _pass_through_notify_observers(self, which):
self.notify_observers(which)
#===========================================================================
# TODO: not working yet
#===========================================================================
def copy(self):
"""Returns a (deep) copy of the current model"""
raise NotImplementedError, "Copy is not yet implemented, TODO: Observable hierarchy"
import copy
from .index_operations import ParameterIndexOperations, ParameterIndexOperationsView
from .lists_and_dicts import ArrayList
@ -880,7 +883,7 @@ class Parameterizable(OptimizationHandlable):
dc[k] = copy.deepcopy(v)
if k == '_parameters_':
params = [p.copy() for p in v]
dc['_parent_'] = None
dc['_parent_index_'] = None
dc['_observer_callables_'] = []
@ -891,12 +894,12 @@ class Parameterizable(OptimizationHandlable):
s = self.__new__(self.__class__)
s.__dict__ = dc
for p in params:
s.add_parameter(p, _ignore_added_names=True)
return s
#===========================================================================
# From being parentable, we have to define the parent_change notification
#===========================================================================

View file

@ -101,7 +101,6 @@ class Parameterized(Parameterizable, Pickleable):
return G
return node
def _getstate(self):
"""
Get the current state of the class,

View file

@ -21,7 +21,7 @@ class VariationalPrior(Parameterized):
updates the gradients for mean and variance **in place**
"""
raise NotImplementedError, "override this for variational inference of latent space"
class NormalPrior(VariationalPrior):
def KL_divergence(self, variational_posterior):
var_mean = np.square(variational_posterior.mean).sum()
@ -71,6 +71,7 @@ class VariationalPosterior(Parameterized):
self.shape = self.mean.shape
self.num_data, self.input_dim = self.mean.shape
self.add_parameters(self.mean, self.variance)
self.num_data, self.input_dim = self.mean.shape
if self.has_uncertain_inputs():
assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion"
@ -125,7 +126,7 @@ class SpikeAndSlabPosterior(VariationalPosterior):
super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
self.gamma = Param("binary_prob",binary_prob, Logistic(1e-10,1.-1e-10))
self.add_parameter(self.gamma)
def plot(self, *args):
"""
Plot latent space X in 1D:

View file

@ -64,8 +64,8 @@ class SparseGP(GP):
self.kern.gradient += target
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations(
self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
else:
#gradients wrt kernel
@ -77,8 +77,8 @@ class SparseGP(GP):
self.kern.gradient += target
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
def _raw_predict(self, Xnew, full_cov=False):
"""

View file

@ -0,0 +1,30 @@
import numpy as np
import pylab as pb
import GPy
pb.ion()
X1 = 100 * np.random.rand(100)[:,None]
X2 = 100 * np.random.rand(100)[:,None]
#X1.sort()
#X2.sort()
Y1 = np.sin(X1/10.) + np.random.rand(100)[:,None]
Y2 = np.cos(X2/10.) + np.random.rand(100)[:,None]
Mlist = [GPy.kern.Matern32(1,lengthscale=20.,name="Mat")]
kern = GPy.util.multioutput.LCM(input_dim=1,num_outputs=12,kernels_list=Mlist,name='H')
m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2], kernel=kern)
m.optimize()
fig = pb.figure()
ax0 = fig.add_subplot(211)
ax1 = fig.add_subplot(212)
slices = GPy.util.multioutput.get_slices([Y1,Y2])
m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],ax=ax0)
m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],ax=ax1)

View file

@ -468,7 +468,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt
def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
"""Run a 1D example of a sparse GP regression with uncertain inputs."""
fig, axes = pb.subplots(1, 2, figsize=(12, 5))
fig, axes = pb.subplots(1, 2, figsize=(12, 5), sharex=True, sharey=True)
# sample inputs and outputs
S = np.ones((20, 1))

View file

@ -19,7 +19,7 @@ class DTC(object):
def __init__(self):
self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y):
def inference(self, kern, X, Z, likelihood, Y):
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
#TODO: MAX! fix this!
@ -80,10 +80,6 @@ class DTC(object):
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU.T}
#update gradients
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
likelihood.update_gradients(dL_dR)
#construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)

View file

@ -33,7 +33,7 @@ class ExactGaussianInference(object):
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
raise NotImplementedError, 'TODO' #TODO
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
def inference(self, kern, X, likelihood, Y, **Y_metadata):
"""
Returns a Posterior class containing essential quantities of the posterior
"""
@ -41,7 +41,7 @@ class ExactGaussianInference(object):
K = kern.K(X)
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata))
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, **Y_metadata))
alpha, _ = dpotrs(LW, YYT_factor, lower=1)
@ -49,9 +49,4 @@ class ExactGaussianInference(object):
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
#TODO: does this really live here?
likelihood.update_gradients(np.diag(dL_dK))
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK}

View file

@ -17,8 +17,7 @@ class FITC(object):
def __init__(self):
self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y):
assert X_variance is None, "cannot use X_variance with FITC. Try varDTC."
def inference(self, kern, X, Z, likelihood, Y):
#TODO: MAX! fix this!
from ...util.misc import param_to_array
@ -81,11 +80,7 @@ class FITC(object):
dL_dU *= beta_star
dL_dU -= 2.*KiU*dL_dR
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T}
#update gradients
kern.update_gradients_sparse(X=X, Z=Z, **grad_dict)
likelihood.update_gradients(dL_dR)
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T, 'partial_for_likelihood':dL_dR}
#construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)

View file

@ -3,6 +3,7 @@
from posterior import Posterior
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
from ...util import diag
from ...core.parameterization.variational import VariationalPosterior
import numpy as np
from ...util.misc import param_to_array
@ -28,7 +29,7 @@ class VarDTC(object):
def set_limit(self, limit):
self.get_trYYT.limit = limit
self.get_YYTfactor.limit = limit
def _get_trYYT(self, Y):
return param_to_array(np.sum(np.square(Y)))
@ -77,10 +78,10 @@ class VarDTC(object):
num_inducing = Z.shape[0]
num_data = Y.shape[0]
# kernel computations, using BGPLVM notation
Kmm = kern.K(Z) +np.eye(Z.shape[0]) * self.const_jitter
Lm = jitchol(Kmm+np.eye(Z.shape[0])*self.const_jitter)
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm)
# The rather complex computations of A
if uncertain_inputs:
@ -169,7 +170,6 @@ class VarDTC(object):
Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0]
from ...util import diag
diag.add(Bi, 1)
woodbury_inv = backsub_both_sides(Lm, Bi)
@ -238,7 +238,8 @@ class VarDTCMissingData(object):
dL_dKmm = 0
log_marginal = 0
Kmm = kern.K(Z)
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
#factor Kmm
Lm = jitchol(Kmm)
if uncertain_inputs: LmInv = dtrtri(Lm)
@ -324,7 +325,6 @@ class VarDTCMissingData(object):
Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0]
from ...util import diag
diag.add(Bi, 1)
woodbury_inv_all[:, :, ind] = backsub_both_sides(Lm, Bi)[:,:,None]

View file

@ -3,50 +3,47 @@
import numpy as np
import itertools
from ...core.parameterization import Parameterized
from kern import Kern
from ...util.caching import Cache_this
from kern import CombinationKernel
class Add(Kern):
def __init__(self, subkerns, tensor):
assert all([isinstance(k, Kern) for k in subkerns])
if tensor:
input_dim = sum([k.input_dim for k in subkerns])
self.input_slices = []
n = 0
for k in subkerns:
self.input_slices.append(slice(n, n+k.input_dim))
n += k.input_dim
else:
assert all([k.input_dim == subkerns[0].input_dim for k in subkerns])
input_dim = subkerns[0].input_dim
self.input_slices = [slice(None) for k in subkerns]
super(Add, self).__init__(input_dim, 'add')
self.add_parameters(*subkerns)
class Add(CombinationKernel):
"""
Add given list of kernels together.
propagates gradients thorugh.
"""
def __init__(self, subkerns, name='add'):
super(Add, self).__init__(subkerns, name)
def K(self, X, X2=None):
@Cache_this(limit=2, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handLes this as X2 == X.
Add all kernels together.
If a list of parts (of this kernel!) `which_parts` is given, only
the parts of the list are taken to compute the covariance.
"""
assert X.shape[1] == self.input_dim
if X2 is None:
return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)])
else:
return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
[p.update_gradients_full(dL_dK, X[:,i_s], X2) for p, i_s in zip(self._parameters_, self.input_slices)]
else:
[p.update_gradients_full(dL_dK, X[:,i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
def update_gradients_diag(self, dL_dKdiag, X):
[p.update_gradients_diag(dL_dKdiag, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
@ -58,27 +55,19 @@ class Add(Kern):
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros_like(X)
if X2 is None:
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
else:
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
target = np.zeros(X.shape)
[target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts]
return target
def Kdiag(self, X):
assert X.shape[1] == self.input_dim
return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
def psi0(self, Z, variational_posterior):
return np.sum([p.psi0(Z[:, i_s], variational_posterior[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)],0)
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
def psi1(self, Z, variational_posterior):
return np.sum([p.psi1(Z[:, i_s], variational_posterior[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
def psi2(self, Z, variational_posterior):
psi2 = np.sum([p.psi2(Z[:, i_s], variational_posterior[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
#return psi2
# compute the "cross" terms
from static import White, Bias
from rbf import RBF
@ -86,54 +75,52 @@ class Add(Kern):
from linear import Linear
#ffrom fixed import Fixed
for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self.input_slices), 2):
for p1, p2 in itertools.combinations(self.parts, 2):
# i1, i2 = p1.active_dims, p2.active_dims
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
tmp = p2.psi1(Z[:,i2], variational_posterior[:, i_s])
tmp = p2.psi1(Z, variational_posterior)
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z[:,i1], variational_posterior[:, i_s])
tmp = p1.psi1(Z, variational_posterior)
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
tmp1 = p1.psi1(Z, variational_posterior)
tmp2 = p2.psi1(Z, variational_posterior)
psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
mu, S = variational_posterior.mean, variational_posterior.variance
for p1, is1 in zip(self._parameters_, self.input_slices):
for p1 in self.parts:
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], variational_posterior[:, is1]) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z[:,is1], variational_posterior[:, is1])
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target = np.zeros(Z.shape)
for p1, is1 in zip(self._parameters_, self.input_slices):
for p1 in self.parts:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
@ -141,22 +128,18 @@ class Add(Kern):
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], variational_posterior[:, is2]) * 2.
target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z[:,is1], variational_posterior[:, is1])
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
return target
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target_mu = np.zeros(variational_posterior.shape)
target_S = np.zeros(variational_posterior.shape)
for p1, is1 in zip(self._parameters_, self.input_slices):
for p1 in self._parameters_:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2, is2 in zip(self._parameters_, self.input_slices):
for p2 in self._parameters_:
if p2 is p1:
continue
if isinstance(p2, White):
@ -164,35 +147,20 @@ class Add(Kern):
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], variational_posterior[:, is2]) * 2.
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z[:,is1], variational_posterior[:, is1])
target_mu += a
target_S += b
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
target_mu[:, p1.active_dims] += a
target_S[:, p1.active_dims] += b
return target_mu, target_S
def input_sensitivity(self):
in_sen = np.zeros((self.num_params, self.input_dim))
for i, [p, i_s] in enumerate(zip(self._parameters_, self.input_slices)):
in_sen[i, i_s] = p.input_sensitivity()
return in_sen
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return Parameterized._getstate(self) + [#self._parameters_,
self.input_dim,
self.input_slices,
self._param_slices_
]
return super(Add, self)._getstate()
def _setstate(self, state):
self._param_slices_ = state.pop()
self.input_slices = state.pop()
self.input_dim = state.pop()
Parameterized._setstate(self, state)
super(Add, self)._setstate(state)

View file

@ -34,8 +34,8 @@ class Coregionalize(Kern):
.. note: see coregionalization examples in GPy.examples.regression for some usage.
"""
def __init__(self, output_dim, rank=1, W=None, kappa=None, name='coregion'):
super(Coregionalize, self).__init__(input_dim=1, name=name)
def __init__(self, input_dim, output_dim, rank=1, W=None, kappa=None, name='coregion'):
super(Coregionalize, self).__init__(input_dim, name=name)
self.output_dim = output_dim
self.rank = rank
if self.rank>output_dim:

View file

@ -40,24 +40,26 @@ class IndependentOutputs(Kern):
the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
"""
def __init__(self, kern, name='independ'):
super(IndependentOutputs, self).__init__(kern.input_dim+1, name)
def __init__(self, active_dim, kern, name='independ'):
assert isinstance(active_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces"
super(IndependentOutputs, self).__init__(np.r_[0:max(max(kern.active_dims)+1, active_dim+1)], name)
self.index_dim = active_dim
self.kern = kern
self.add_parameters(self.kern)
def K(self,X ,X2=None):
X, slices = X[:,:-1], index_to_slices(X[:,-1])
slices = index_to_slices(X[:,self.index_dim])
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[[np.copyto(target[s,s], self.kern.K(X[s], None)) for s in slices_i] for slices_i in slices]
[[np.copyto(target[s,s], self.kern.K(X[s,:], None)) for s in slices_i] for slices_i in slices]
else:
X2, slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0]))
[[[np.copyto(target[s, s2], self.kern.K(X[s],X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
[[[np.copyto(target[s, s2], self.kern.K(X[s,:],X2[s2,:])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
return target
def Kdiag(self,X):
X, slices = X[:,:-1], index_to_slices(X[:,-1])
slices = index_to_slices(X[:,self.index_dim])
target = np.zeros(X.shape[0])
[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices]
return target
@ -66,20 +68,19 @@ class IndependentOutputs(Kern):
target = np.zeros(self.kern.size)
def collate_grads(dL, X, X2):
self.kern.update_gradients_full(dL,X,X2)
self.kern._collect_gradient(target)
target += self.kern.gradient
X,slices = X[:,:-1],index_to_slices(X[:,-1])
slices = index_to_slices(X[:,self.index_dim])
if X2 is None:
[[collate_grads(dL_dK[s,s], X[s], None) for s in slices_i] for slices_i in slices]
else:
X2, slices2 = X2[:,:-1], index_to_slices(X2[:,-1])
slices2 = index_to_slices(X2[:,self.index_dim])
[[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
self.kern._set_gradient(target)
self.kern.gradient = target
def gradients_X(self,dL_dK, X, X2=None):
target = np.zeros_like(X)
X, slices = X[:,:-1],index_to_slices(X[:,-1])
slices = index_to_slices(X[:,self.index_dim])
if X2 is None:
[[np.copyto(target[s,:-1], self.kern.gradients_X(dL_dK[s,s],X[s],None)) for s in slices_i] for slices_i in slices]
else:
@ -88,7 +89,7 @@ class IndependentOutputs(Kern):
return target
def gradients_X_diag(self, dL_dKdiag, X):
X, slices = X[:,:-1], index_to_slices(X[:,-1])
slices = index_to_slices(X[:,self.index_dim])
target = np.zeros(X.shape)
[[np.copyto(target[s,:-1], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices]
return target
@ -97,10 +98,10 @@ class IndependentOutputs(Kern):
target = np.zeros(self.kern.size)
def collate_grads(dL, X):
self.kern.update_gradients_diag(dL,X)
self.kern._collect_gradient(target)
self.target += self.kern.gradient
X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices]
self.kern._set_gradient(target)
self.kern.gradient = target
class Hierarchical(Kern):
"""

View file

@ -3,12 +3,19 @@
import sys
import numpy as np
import itertools
from ...core.parameterization import Parameterized
from ...core.parameterization.param import Param
from ...core.parameterization.parameterized import Parameterized
from kernel_slice_operations import KernCallsViaSlicerMeta
from ...util.caching import Cache_this
class Kern(Parameterized):
#===========================================================================
# This adds input slice support. The rather ugly code for slicing can be
# found in kernel_slice_operations
__metaclass__ = KernCallsViaSlicerMeta
#===========================================================================
_debug=False
def __init__(self, input_dim, name, *a, **kw):
"""
The base class for a kernel: a positive definite function
@ -20,11 +27,29 @@ class Kern(Parameterized):
Do not instantiate.
"""
super(Kern, self).__init__(name=name, *a, **kw)
self.input_dim = input_dim
if isinstance(input_dim, int):
self.active_dims = np.r_[0:input_dim]
self.input_dim = input_dim
else:
self.active_dims = np.r_[input_dim]
self.input_dim = len(self.active_dims)
self._sliced_X = 0
@Cache_this(limit=10)#, ignore_args = (0,))
def _slice_X(self, X):
return X[:, self.active_dims]
def K(self, X, X2):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handLes this as X2 == X.
"""
raise NotImplementedError
def Kdiag(self, Xa):
def Kdiag(self, X):
raise NotImplementedError
def psi0(self, Z, variational_posterior):
raise NotImplementedError
@ -34,7 +59,7 @@ class Kern(Parameterized):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2):
raise NotImplementedError
def gradients_X_diag(self, dL_dK, X):
def gradients_X_diag(self, dL_dKdiag, X):
raise NotImplementedError
def update_gradients_diag(self, dL_dKdiag, X):
@ -44,7 +69,9 @@ class Kern(Parameterized):
def update_gradients_full(self, dL_dK, X, X2):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def update_gradients_diag(self, dL_dKdiag, X):
"""Set the gradients for all parameters for the derivative of the diagonal of the covariance w.r.t the kernel parameters."""
raise NotImplementedError
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
Set the gradients of all parameters when doing inference with
@ -99,17 +126,10 @@ class Kern(Parameterized):
""" Overloading of the '+' operator. for more control, see self.add """
return self.add(other)
def add(self, other, tensor=False):
def add(self, other, name='add'):
"""
Add another kernel to this one.
If Tensor is False, both kernels are defined on the same _space_. then
the created kernel will have the same number of inputs as self and
other (which must be the same).
If Tensor is True, then the dimensions are stacked 'horizontally', so
that the resulting kernel has self.input_dim + other.input_dim
:param other: the other kernel to be added
:type other: GPy.kern
@ -117,11 +137,11 @@ class Kern(Parameterized):
assert isinstance(other, Kern), "only kernels can be added to kernels..."
from add import Add
kernels = []
if not tensor and isinstance(self, Add): kernels.extend(self._parameters_)
if isinstance(self, Add): kernels.extend(self._parameters_)
else: kernels.append(self)
if not tensor and isinstance(other, Add): kernels.extend(other._parameters_)
if isinstance(other, Add): kernels.extend(other._parameters_)
else: kernels.append(other)
return Add(kernels, tensor)
return Add(kernels, name=name)
def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
@ -131,9 +151,12 @@ class Kern(Parameterized):
"""
Shortcut for tensor `prod`.
"""
return self.prod(other, tensor=True)
assert self.active_dims == range(self.input_dim), "Can only use kernels, which have their input_dims defined from 0"
assert other.active_dims == range(other.input_dim), "Can only use kernels, which have their input_dims defined from 0"
other.active_dims += self.input_dim
return self.prod(other)
def prod(self, other, tensor=False, name=None):
def prod(self, other, name='mul'):
"""
Multiply two kernels (either on the same space, or on the tensor
product of the input space).
@ -146,4 +169,45 @@ class Kern(Parameterized):
"""
assert isinstance(other, Kern), "only kernels can be added to kernels..."
from prod import Prod
return Prod(self, other, tensor, name)
#kernels = []
#if isinstance(self, Prod): kernels.extend(self._parameters_)
#else: kernels.append(self)
#if isinstance(other, Prod): kernels.extend(other._parameters_)
#else: kernels.append(other)
return Prod([self, other], name)
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return super(Kern, self)._getstate() + [
self.active_dims,
self.input_dim,
self._sliced_X]
def _setstate(self, state):
self._sliced_X = state.pop()
self.input_dim = state.pop()
self.active_dims = state.pop()
super(Kern, self)._setstate(state)
class CombinationKernel(Kern):
def __init__(self, kernels, name):
assert all([isinstance(k, Kern) for k in kernels])
# make sure the active dimensions of all underlying kernels are covered:
ma = reduce(lambda a,b: max(a, max(b)), (x.active_dims for x in kernels), 0)
input_dim = np.r_[0:ma+1]
# initialize the kernel with the full input_dim
super(CombinationKernel, self).__init__(input_dim, name)
self.add_parameters(*kernels)
@property
def parts(self):
return self._parameters_
def input_sensitivity(self):
in_sen = np.zeros((self.num_params, self.input_dim))
for i, p in enumerate(self.parts):
in_sen[i, p.active_dims] = p.input_sensitivity()
return in_sen

View file

@ -0,0 +1,108 @@
'''
Created on 11 Mar 2014
@author: maxz
'''
from ...core.parameterization.parameterized import ParametersChangedMeta
class KernCallsViaSlicerMeta(ParametersChangedMeta):
def __call__(self, *args, **kw):
instance = super(ParametersChangedMeta, self).__call__(*args, **kw)
instance.K = _slice_wrapper(instance, instance.K)
instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True)
instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True)
instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True)
instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True)
instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True)
instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False)
instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False)
instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False)
instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True)
instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True)
instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True)
instance.parameters_changed()
return instance
def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False):
"""
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension.
The different switches are:
diag: if X2 exists
derivative: if first arg is dL_dK
psi_stat: if first 3 args are dL_dpsi0..2
psi_stat_Z: if first 2 args are dL_dpsi1..2
"""
if derivative:
if diag:
def x_slice_wrapper(dL_dK, X):
X = kern._slice_X(X) if not kern._sliced_X else X
kern._sliced_X += 1
try:
ret = operation(dL_dK, X)
except:
raise
finally:
kern._sliced_X -= 1
return ret
elif psi_stat:
def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
kern._sliced_X += 1
try:
ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
except:
raise
finally:
kern._sliced_X -= 1
return ret
elif psi_stat_Z:
def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior):
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
kern._sliced_X += 1
try:
ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
except:
raise
finally:
kern._sliced_X -= 1
return ret
else:
def x_slice_wrapper(dL_dK, X, X2=None):
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
kern._sliced_X += 1
try:
ret = operation(dL_dK, X, X2)
except:
raise
finally:
kern._sliced_X -= 1
return ret
else:
if diag:
def x_slice_wrapper(X, *args, **kw):
X = kern._slice_X(X) if not kern._sliced_X else X
kern._sliced_X += 1
try:
ret = operation(X, *args, **kw)
except:
raise
finally:
kern._sliced_X -= 1
return ret
else:
def x_slice_wrapper(X, X2=None, *args, **kw):
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
kern._sliced_X += 1
try:
ret = operation(X, X2, *args, **kw)
except: raise
finally:
kern._sliced_X -= 1
return ret
x_slice_wrapper._operation = operation
x_slice_wrapper.__name__ = ("slicer("+operation.__name__
+(","+str(bool(diag)) if diag else'')
+(','+str(bool(derivative)) if derivative else '')
+')')
x_slice_wrapper.__doc__ = "**sliced**\n" + (operation.__doc__ or "")
return x_slice_wrapper

View file

@ -147,7 +147,6 @@ class Linear(Kern):
mu = variational_posterior.mean
S = variational_posterior.variance
mu2S = np.square(mu)+S
_dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\
np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance)
@ -175,7 +174,7 @@ class Linear(Kern):
mu = variational_posterior.mean
S = variational_posterior.variance
_, _, _, _, _dpsi2_dZ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\
np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ)

View file

@ -96,12 +96,12 @@ class MLP(Kern):
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
return 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target):
def gradients_X_diag(self, dL_dKdiag, X):
"""Gradient of diagonal of covariance with respect to X"""
self._K_diag_computations(X)
arg = self._K_diag_asin_arg
denom = self._K_diag_denom
numer = self._K_diag_numer
#numer = self._K_diag_numer
return four_over_tau*2.*self.weight_variance*self.variance*X*(1./denom*(1. - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None]

View file

@ -85,8 +85,9 @@ class PeriodicExponential(Periodic):
self.b = [1]
self.basis_alpha = np.ones((self.n_basis,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))[:,0]
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
self.basis_omega = (2*np.pi*np.arange(1,self.n_freq+1)/self.period).repeat(2)
self.basis_phi = np.zeros(self.n_freq * 2)
self.basis_phi[::2] = -np.pi/2
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
@ -100,7 +101,6 @@ class PeriodicExponential(Periodic):
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T))
#@silence_errors
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
if X2 is None: X2 = X
@ -194,8 +194,9 @@ class PeriodicMatern32(Periodic):
self.b = [1,self.lengthscale**2/3]
self.basis_alpha = np.ones((self.n_basis,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
self.basis_omega = (2*np.pi*np.arange(1,self.n_freq+1)/self.period).repeat(2)
self.basis_phi = np.zeros(self.n_freq * 2)
self.basis_phi[::2] = -np.pi/2
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
@ -212,8 +213,8 @@ class PeriodicMatern32(Periodic):
return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
@silence_errors
def update_gradients_full(self,dL_dK,X,X2,target):
#@silence_errors
def update_gradients_full(self,dL_dK,X,X2):
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@ -307,8 +308,9 @@ class PeriodicMatern52(Periodic):
self.b = [9./8, 9*self.lengthscale**4/200., 3*self.lengthscale**2/5., 3*self.lengthscale**2/(5*8.), 3*self.lengthscale**2/(5*8.)]
self.basis_alpha = np.ones((2*self.n_freq,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
self.basis_omega = (2*np.pi*np.arange(1,self.n_freq+1)/self.period).repeat(2)
self.basis_phi = np.zeros(self.n_freq * 2)
self.basis_phi[::2] = -np.pi/2
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)

View file

@ -1,10 +1,12 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
import numpy as np
from kern import CombinationKernel
from ...util.caching import Cache_this
import itertools
class Prod(Kern):
class Prod(CombinationKernel):
"""
Computes the product of 2 kernels
@ -15,49 +17,49 @@ class Prod(Kern):
:rtype: kernel object
"""
def __init__(self, k1, k2, tensor=False,name=None):
if tensor:
name = k1.name + '_xx_' + k2.name if name is None else name
super(Prod, self).__init__(k1.input_dim + k2.input_dim, name)
self.slice1 = slice(0,k1.input_dim)
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
else:
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
name = k1.name + '_x_' + k2.name if name is None else name
super(Prod, self).__init__(k1.input_dim, name)
self.slice1 = slice(0, self.input_dim)
self.slice2 = slice(0, self.input_dim)
self.k1 = k1
self.k2 = k2
self.add_parameters(self.k1, self.k2)
def __init__(self, kernels, name='mul'):
assert len(kernels) == 2, 'only implemented for two kernels as of yet'
super(Prod, self).__init__(kernels, name)
def K(self, X, X2=None):
if X2 is None:
return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None)
else:
return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2])
@Cache_this(limit=2, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.multiply, (p.K(X, X2) for p in which_parts))
def Kdiag(self, X):
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
@Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X):
self.k1.update_gradients_full(dL_dK*self.k2.K(X[:,self.slice2]), X[:,self.slice1])
self.k2.update_gradients_full(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2])
def update_gradients_full(self, dL_dK, X, X2=None):
for k1,k2 in itertools.combinations(self.parts, 2):
k1.update_gradients_full(dL_dK*k2.K(X, X2), X, X2)
k2.update_gradients_full(dL_dK*k1.K(X, X2), X, X2)
def update_gradients_diag(self, dL_dKdiag, X):
for k1,k2 in itertools.combinations(self.parts, 2):
k1.update_gradients_diag(dL_dKdiag*k2.Kdiag(X), X)
k2.update_gradients_diag(dL_dKdiag*k1.Kdiag(X), X)
def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape)
if X2 is None:
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2.K(X[:,self.slice2]), X[:,self.slice1], None)
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1.K(X[:,self.slice1]), X[:,self.slice2], None)
else:
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2.K(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1])
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1.K(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2])
for k1,k2 in itertools.combinations(self.parts, 2):
target[:,k1.active_dims] += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2)
target[:,k2.active_dims] += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2)
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
target[:,self.slice1] = self.k1.gradients_X(dL_dKdiag*self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1])
target[:,self.slice2] += self.k2.gradients_X(dL_dKdiag*self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2])
for k1,k2 in itertools.combinations(self.parts, 2):
target[:,k1.active_dims] += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X)
target[:,k2.active_dims] += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X)
return target

View file

@ -19,7 +19,6 @@ class RBF(Stationary):
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, name='rbf'):
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, name)
self.weave_options = {}
@ -56,31 +55,33 @@ class RBF(Stationary):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
_, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0)
#from psi1
self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
if self.ARD:
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
else:
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
#from psi2
self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
if self.ARD:
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
else:
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale **2
l2 = self.lengthscale**2
if l2.size != self.input_dim:
l2 = l2*np.ones(self.input_dim)
#contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0)
if self._debug:
num_grad = self.lengthscale.gradient.copy()
self.lengthscale.gradient = 0.
#from psi1
@ -92,16 +93,16 @@ class RBF(Stationary):
else:
self.lengthscale.gradient += dpsi1_dlength.sum()
self.variance.gradient += np.sum(dL_dpsi1 * psi1) / self.variance
#from psi2
S = variational_posterior.variance
_, Zdist_sq, _, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
if not self.ARD:
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2).sum()
else:
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
if self._debug:
import ipdb;ipdb.set_trace()
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
else:
@ -112,17 +113,16 @@ class RBF(Stationary):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1
grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
#psi2
grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
return grad
elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale **2
#psi1
@ -145,23 +145,24 @@ class RBF(Stationary):
# Spike-and-Slab GPLVM
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
ndata = variational_posterior.mean.shape[0]
_, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1
grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
#psi2
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
return grad_mu, grad_S, grad_gamma
elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale **2
#psi1
denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)

View file

@ -89,3 +89,31 @@ class Bias(Static):
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
class Fixed(Static):
def __init__(self, input_dim, covariance_matrix, variance=1., name='fixed'):
"""
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
super(Bias, self).__init__(input_dim, variance, name)
self.fixed_K = covariance_matrix
def K(self, X, X2):
return self.variance * self.fixed_K
def Kdiag(self, X):
return self.variance * self.fixed_K.diag()
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K)
def psi2(self, Z, variational_posterior):
return np.zeros((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum()

View file

@ -57,7 +57,7 @@ class Stationary(Kern):
if lengthscale.size != input_dim:
lengthscale = np.ones(input_dim)*lengthscale
else:
lengthscale = np.ones(self.input_dim)
lengthscale = np.ones(self.input_dim)
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
self.variance = Param('variance', variance, Logexp())
assert self.variance.size==1
@ -85,12 +85,14 @@ class Stationary(Kern):
Compute the Euclidean distance between each row of X and X2, or between
each pair of rows of X if X2 is None.
"""
#X, = self._slice_X(X)
if X2 is None:
Xsq = np.sum(np.square(X),1)
r2 = -2.*tdot(X) + (Xsq[:,None] + Xsq[None,:])
util.diag.view(r2)[:,]= 0. # force diagnoal to be zero: sometime numerically a little negative
return np.sqrt(r2)
else:
#X2, = self._slice_X(X2)
X1sq = np.sum(np.square(X),1)
X2sq = np.sum(np.square(X2),1)
return np.sqrt(-2.*np.dot(X, X2.T) + (X1sq[:,None] + X2sq[None,:]))
@ -124,7 +126,6 @@ class Stationary(Kern):
self.lengthscale.gradient = 0.
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
#now the lengthscale gradient(s)
@ -136,7 +137,7 @@ class Stationary(Kern):
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
tmp = dL_dr*self._inv_dist(X, X2)
if X2 is None: X2 = X
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(self._slice_X(X)[:,q:q+1] - self._slice_X(X2)[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
else:
r = self._scaled_dist(X, X2)
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
@ -176,7 +177,6 @@ class Stationary(Kern):
ret = np.empty(X.shape, dtype=np.float64)
[np.einsum('ij,ij->i', tmp, X[:,q][:,None]-X2[:,q][None,:], out=ret[:,q]) for q in xrange(self.input_dim)]
ret /= self.lengthscale**2
return ret
def gradients_X_diag(self, dL_dKdiag, X):

View file

@ -5,3 +5,4 @@ from gamma import Gamma
from poisson import Poisson
from student_t import StudentT
from likelihood import Likelihood
from mixed_noise import MixedNoise

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#TODO
"""
A lot of this code assumes that the link function is the identity.
A lot of this code assumes that the link function is the identity.
I think laplace code is okay, but I'm quite sure that the EP moments will only work if the link is identity.
@ -49,7 +49,7 @@ class Gaussian(Likelihood):
if isinstance(gp_link, link_functions.Identity):
self.log_concave = True
def covariance_matrix(self, Y, Y_metadata=None):
def covariance_matrix(self, Y, **Y_metadata):
return np.eye(Y.shape[0]) * self.variance
def update_gradients(self, partial):

View file

@ -0,0 +1,58 @@
import numpy as np
from scipy import stats, special
from GPy.util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions
from likelihood import Likelihood
from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp
from ..core.parameterization import Parameterized
import itertools
class MixedNoise(Likelihood):
def __init__(self, likelihoods_list, noise_index, variance = None, name='mixed_noise'):
Nlike = len(likelihoods_list)
self.order = np.unique(noise_index)
assert self.order.size == Nlike
if variance is None:
variance = np.ones(Nlike)
else:
assert variance.size == Nlike
super(Likelihood, self).__init__(name=name)
self.add_parameters(*likelihoods_list)
self.likelihoods_list = likelihoods_list
self.noise_index = noise_index
self.log_concave = False
self.likelihoods_indices = [noise_index.flatten()==j for j in self.order]
def covariance_matrix(self, Y, noise_index, **Y_metadata):
variance = np.zeros(Y.shape[0])
for lik, ind in itertools.izip(self.likelihoods_list, self.likelihoods_indices):
variance[ind] = lik.variance
return np.diag(variance)
def update_gradients(self, partial, noise_index, **Y_metadata):
[lik.update_gradients(partial[ind]) for lik,ind in itertools.izip(self.likelihoods_list, self.likelihoods_indices)]
def predictive_values(self, mu, var, full_cov=False, noise_index=None, **Y_metadata):
_variance = np.array([ self.likelihoods_list[j].variance for j in noise_index ])
if full_cov:
var += np.eye(var.shape[0])*_variance
d = 2*np.sqrt(np.diag(var))
low, up = mu - d, mu + d
else:
var += _variance
d = 2*np.sqrt(var)
low, up = mu - d, mu + d
return mu, var, low, up
def predictive_variance(self, mu, sigma, noise_index, predictive_mean=None, **Y_metadata):
if isinstance(noise_index,int):
_variance = self.variance[noise_index]
else:
_variance = np.array([ self.variance[j] for j in noise_index ])[:,None]
return _variance + sigma**2

View file

@ -13,6 +13,6 @@ from warped_gp import WarpedGP
from bayesian_gplvm import BayesianGPLVM
from mrd import MRD
from gradient_checker import GradientChecker
from gp_multioutput_regression import GPMultioutputRegression
from sparse_gp_multioutput_regression import SparseGPMultioutputRegression
from ss_gplvm import SSGPLVM
from ss_gplvm import SSGPLVM
from gp_coregionalized_regression import GPCoregionalizedRegression
#.py file not included!!! #from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression

View file

@ -0,0 +1,44 @@
# Copyright (c) 2012 - 2014 the GPy Austhors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import GP
from .. import likelihoods
from .. import kern
from .. import util
class GPCoregionalizedRegression(GP):
"""
Gaussian Process model for heteroscedastic multioutput regression
This is a thin wrapper around the models.GP class, with a set of sensible defaults
:param X_list: list of input observations corresponding to each output
:type X_list: list of numpy arrays
:param Y_list: list of observed values related to the different noise models
:type Y_list: list of numpy arrays
:param kernel: a GPy kernel, defaults to RBF ** Coregionalized
:type kernel: None | GPy.kernel defaults
:likelihoods_list: a list of likelihoods, defaults to list of Gaussian likelihoods
:type likelihoods_list: None | a list GPy.likelihoods
:param name: model name
:type name: string
:param W_rank: number tuples of the corregionalization parameters 'W' (see coregionalize kernel documentation)
:type W_rank: integer
:param kernel_name: name of the kernel
:type kernel_name: string
"""
def __init__(self, X_list, Y_list, kernel=None, likelihoods_list=None, name='GPCR',W_rank=1,kernel_name='X'):
#Input and Output
X,Y,self.noise_index = util.multioutput.build_XY(X_list,Y_list)
Ny = len(Y_list)
#Kernel
if kernel is None:
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.rbf(X.shape[1]-1), W_rank=1,name=kernel_name)
#Likelihood
likelihood = util.multioutput.build_likelihood(Y_list,self.noise_index,likelihoods_list)
super(GPCoregionalizedRegression, self).__init__(X,Y,kernel,likelihood, noise_index=self.noise_index)

View file

@ -41,7 +41,7 @@ class GPLVM(GP):
def parameters_changed(self):
super(GPLVM, self).parameters_changed()
self.X.gradient = self.kern.gradients_X(self.dL_dK, self.X, None)
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
def _getstate(self):
return GP._getstate(self)

View file

@ -15,13 +15,13 @@ from ..likelihoods import Gaussian
class MRD(Model):
"""
Apply MRD to all given datasets Y in Ylist.
Apply MRD to all given datasets Y in Ylist.
Y_i in [n x p_i]
The samples n in the datasets need
The samples n in the datasets need
to match up, whereas the dimensionality p_d can differ.
:param [array-like] Ylist: List of datasets to apply MRD on
:param input_dim: latent dimensionality
:type input_dim: int
@ -45,13 +45,12 @@ class MRD(Model):
:param str name: the name of this model
:param [str] Ynames: the names for the datasets given, must be of equal length as Ylist or None
"""
def __init__(self, Ylist, input_dim, X=None, X_variance=None,
def __init__(self, Ylist, input_dim, X=None, X_variance=None,
initx = 'PCA', initz = 'permute',
num_inducing=10, Z=None, kernel=None,
num_inducing=10, Z=None, kernel=None,
inference_method=None, likelihood=None, name='mrd', Ynames=None):
super(MRD, self).__init__(name)
# sort out the kernels
if kernel is None:
from ..kern import RBF
@ -64,23 +63,23 @@ class MRD(Model):
self.kern = kernel
self.input_dim = input_dim
self.num_inducing = num_inducing
self.Ylist = Ylist
self._in_init_ = True
X = self._init_X(initx, Ylist)
self.Z = Param('inducing inputs', self._init_Z(initz, X))
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
if X_variance is None:
X_variance = np.random.uniform(0, .2, X.shape)
self.variational_prior = NormalPrior()
self.X = NormalPosterior(X, X_variance)
if likelihood is None:
self.likelihood = [Gaussian(name='Gaussian_noise'.format(i)) for i in range(len(Ylist))]
else: self.likelihood = likelihood
if inference_method is None:
self.inference_method= []
for y in Ylist:
@ -91,12 +90,12 @@ class MRD(Model):
else:
self.inference_method = inference_method
self.inference_method.set_limit(len(Ylist))
self.add_parameters(self.X, self.Z)
if Ynames is None:
Ynames = ['Y{}'.format(i) for i in range(len(Ylist))]
for i, n, k, l in itertools.izip(itertools.count(), Ynames, self.kern, self.likelihood):
p = Parameterized(name=n)
p.add_parameter(k)
@ -104,23 +103,23 @@ class MRD(Model):
setattr(self, 'Y{}'.format(i), p)
self.add_parameter(p)
self._in_init_ = False
def parameters_changed(self):
self._log_marginal_likelihood = 0
self.posteriors = []
self.Z.gradient = 0.
self.X.mean.gradient = 0.
self.X.variance.gradient = 0.
for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method):
posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
self.posteriors.append(posterior)
self._log_marginal_likelihood += lml
# likelihood gradients
l.update_gradients(grad_dict.pop('partial_for_likelihood'))
#gradients wrt kernel
dL_dKmm = grad_dict.pop('dL_dKmm')
k.update_gradients_full(dL_dKmm, self.Z, None)
@ -132,7 +131,7 @@ class MRD(Model):
self.Z.gradient += k.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += k.gradients_Z_expectations(
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
dL_dmean, dL_dS = k.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **grad_dict)
self.X.mean.gradient += dL_dmean
self.X.variance.gradient += dL_dS

View file

@ -45,10 +45,10 @@ class SparseGPRegression(SparseGP):
assert Z.shape[1] == input_dim
likelihood = likelihoods.Gaussian()
if not (X_variance is None):
X = NormalPosterior(X,X_variance)
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC())
def _getstate(self):

View file

@ -56,8 +56,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean
X_variance = param_to_array(model.X.variance)
else:
@ -68,7 +68,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(model.input_dim),fixed_dims)
plots = {}
#one dimensional plotting
if len(free_dims) == 1:
@ -86,23 +86,30 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
upper = m + 2*np.sqrt(v)
Y = Y
else:
m, v, lower, upper = model.predict(Xgrid)
if 'noise_index' in model.Y_metadata.keys():
if np.unique(model.Y_metadata['noise_index'][which_data_rows]).size > 1:
print "Data slices choosen have different noise models. Just one will be used."
noise_index = np.repeat(model.Y_metadata['noise_index'][which_data_rows][0], Xgrid.shape[0])[:,None]
m, v, lower, upper = model.predict(Xgrid,full_cov=False,noise_index=noise_index)
else:
noise_index = None
m, v, lower, upper = model.predict(Xgrid,full_cov=False)
Y = Y
for d in which_data_ycols:
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
plots['gpplot'] = gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], ax=ax, edgecol=linecol, fillcol=fillcol)
plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
#optionally plot some samples
if samples: #NOTE not tested with fixed_inputs
Ysim = model.posterior_samples(Xgrid, samples)
for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
#add error bars for uncertain (if input uncertainty is being modelled)
if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
@ -118,7 +125,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
Zu = Z[:,free_dims]
z_height = ax.get_ylim()[0]
ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
plots['inducing_inputs'] = ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)
@ -143,8 +150,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
Y = Y
for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T
ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
plots['contour'] = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
plots['dataplot'] = ax.scatter(X[which_data_rows, free_dims[0]], X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
#set the limits of the plot to some sensible values
ax.set_xlim(xmin[0], xmax[0])
@ -157,11 +164,11 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
if hasattr(model,"Z"):
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
Zu = Z[:,free_dims]
ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo')
plots['inducing_inputs'] = ax.plot(Zu[:,free_dims[0]], Zu[:,free_dims[1]], 'wo')
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
return plots
def plot_fit_f(model, *args, **kwargs):
"""

View file

@ -1,85 +0,0 @@
# Copyright (c) 2012, Nicolo Fusi
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
from ..models import BayesianGPLVM
class BGPLVMTests(unittest.TestCase):
def test_bias_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_linear_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.Linear(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.RBF(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_bias_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.RBF(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.RBF(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_line_kern(self):
N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim)
k = GPy.kern.RBF(input_dim) + GPy.kern.Linear(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.RBF(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
def test_linear_bias_kern(self):
N, num_inducing, input_dim, D = 30, 5, 4, 30
X = np.random.rand(N, input_dim)
k = GPy.kern.Linear(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0)
k = GPy.kern.Linear(input_dim) + GPy.kern.Bias(input_dim) + GPy.kern.White(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -6,7 +6,9 @@ import numpy as np
import GPy
import sys
verbose = True
verbose = 0
class Kern_check_model(GPy.core.Model):
"""
@ -31,9 +33,10 @@ class Kern_check_model(GPy.core.Model):
self.X2 = X2
self.dL_dK = dL_dK
def is_positive_definite(self):
def is_positive_semi_definite(self):
v = np.linalg.eig(self.kernel.K(self.X))[0]
if any(v<-10*sys.float_info.epsilon):
if any(v.real<=-1e-10):
print v.real.min()
return False
else:
return True
@ -87,11 +90,11 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
return (np.diag(self.dL_dK)*self.kernel.Kdiag(self.X)).sum()
def parameters_changed(self):
self.X.gradient = self.kernel.gradients_X_diag(self.dL_dK, self.X)
self.X.gradient = self.kernel.gradients_X_diag(self.dL_dK.diagonal(), self.X)
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False):
"""
This function runs on kernels to check the correctness of their
implementation. It checks that the covariance function is positive definite
@ -117,7 +120,7 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
if verbose:
print("Checking covariance function is positive definite.")
result = Kern_check_model(kern, X=X).is_positive_definite()
result = Kern_check_model(kern, X=X).is_positive_semi_definite()
if result and verbose:
print("Check passed.")
if not result:
@ -210,25 +213,90 @@ def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
class KernelTestsContinuous(unittest.TestCase):
class KernelGradientTestsContinuous(unittest.TestCase):
def setUp(self):
self.X = np.random.randn(100,2)
self.X2 = np.random.randn(110,2)
self.N, self.D = 100, 5
self.X = np.random.randn(self.N,self.D)
self.X2 = np.random.randn(self.N+10,self.D)
continuous_kerns = ['RBF', 'Linear']
self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns]
def test_Matern32(self):
k = GPy.kern.Matern32(2)
self.assertTrue(kern_test(k, X=self.X, X2=self.X2, verbose=verbose))
k = GPy.kern.Matern32(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Prod(self):
k = GPy.kern.Matern32([2,3]) * GPy.kern.RBF([0,4]) + GPy.kern.Linear(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Add(self):
k = GPy.kern.Matern32([2,3]) + GPy.kern.RBF([0,4]) + GPy.kern.Linear(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Matern52(self):
k = GPy.kern.Matern52(2)
self.assertTrue(kern_test(k, X=self.X, X2=self.X2, verbose=verbose))
k = GPy.kern.Matern52(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
#TODO: turn off grad checkingwrt X for indexed kernels liek coregionalize
def test_RBF(self):
k = GPy.kern.RBF(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Linear(self):
k = GPy.kern.Linear(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
#TODO: turn off grad checkingwrt X for indexed kernels liek coregionalize
# class KernelGradientTestsContinuous1D(unittest.TestCase):
# def setUp(self):
# self.N, self.D = 100, 1
# self.X = np.random.randn(self.N,self.D)
# self.X2 = np.random.randn(self.N+10,self.D)
#
# continuous_kerns = ['RBF', 'Linear']
# self.kernclasses = [getattr(GPy.kern, s) for s in continuous_kerns]
#
# def test_PeriodicExponential(self):
# k = GPy.kern.PeriodicExponential(self.D)
# k.randomize()
# self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
#
# def test_PeriodicMatern32(self):
# k = GPy.kern.PeriodicMatern32(self.D)
# k.randomize()
# self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
#
# def test_PeriodicMatern52(self):
# k = GPy.kern.PeriodicMatern52(self.D)
# k.randomize()
# self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
class KernelTestsMiscellaneous(unittest.TestCase):
def setUp(self):
N, D = 100, 10
self.X = np.linspace(-np.pi, +np.pi, N)[:,None] * np.ones(D)
self.rbf = GPy.kern.RBF(range(2))
self.linear = GPy.kern.Linear((3,6))
self.matern = GPy.kern.Matern32(np.array([2,4,7]))
self.sumkern = self.rbf + self.linear
self.sumkern += self.matern
self.sumkern.randomize()
def test_active_dims(self):
self.assertListEqual(self.sumkern.active_dims.tolist(), range(8))
def test_which_parts(self):
self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.matern]), self.linear.K(self.X)+self.matern.K(self.X)))
self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=[self.linear, self.rbf]), self.linear.K(self.X)+self.rbf.K(self.X)))
self.assertTrue(np.allclose(self.sumkern.K(self.X, which_parts=self.sumkern.parts[0]), self.rbf.K(self.X)))
if __name__ == "__main__":

View file

@ -541,7 +541,8 @@ class TestNoiseModels(object):
#import ipdb; ipdb.set_trace()
#NOTE this test appears to be stochastic for some likelihoods (student t?)
# appears to all be working in test mode right now...
#if isinstance(model, GPy.likelihoods.StudentT):
# import ipdb;ipdb.set_trace()
assert m.checkgrad(step=step)
###########
@ -651,7 +652,7 @@ class LaplaceTests(unittest.TestCase):
m2['.*white'].constrain_fixed(1e-6)
m2['.*rbf.variance'].constrain_bounded(1e-4, 10)
m2.randomize()
if debug:
print m1
print m2
@ -663,7 +664,7 @@ class LaplaceTests(unittest.TestCase):
if debug:
print m1
print m2
m2[:] = m1[:]
#Predict for training points to get posterior mean and variance
@ -700,9 +701,8 @@ class LaplaceTests(unittest.TestCase):
np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2)
#Check marginals are the same with random
m1.randomize()
import ipdb;ipdb.set_trace()
m2[:] = m1[:]
np.testing.assert_almost_equal(m1.log_likelihood(), m2.log_likelihood(), decimal=2)
#Check they are checkgradding

View file

@ -1,32 +0,0 @@
# Copyright (c) 2013, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
'''
Created on 10 Apr 2013
@author: maxz
'''
import unittest
import numpy as np
import GPy
class MRDTests(unittest.TestCase):
def test_gradients(self):
num_m = 3
N, num_inducing, input_dim, D = 20, 8, 6, 20
X = np.random.rand(N, input_dim)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
K = k.K(X)
Ylist = [np.random.multivariate_normal(np.zeros(N), K, input_dim).T for _ in range(num_m)]
likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist]
m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, num_inducing=num_inducing)
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -16,21 +16,21 @@ class Test(unittest.TestCase):
from GPy.core.parameterization import Param
from GPy.core.parameterization.transformations import Logistic
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
self.test1 = GPy.core.Parameterized("test model")
self.test1.add_parameter(self.white)
self.test1.add_parameter(self.rbf, 0)
self.test1.add_parameter(self.param)
x = np.linspace(-2,6,4)[:,None]
y = np.sin(x)
self.testmodel = GPy.models.GPRegression(x,y)
def test_add_parameter(self):
self.assertEquals(self.rbf._parent_index_, 0)
self.assertEquals(self.white._parent_index_, 1)
pass
def test_fixes(self):
self.white.fix(warning=False)
self.test1.remove_parameter(self.test1.param)
@ -41,18 +41,18 @@ class Test(unittest.TestCase):
self.test1.add_parameter(self.white, 0)
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
def test_remove_parameter(self):
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
self.white.fix()
self.test1.remove_parameter(self.white)
self.assertIs(self.test1._fixes_,None)
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
self.assertEquals(self.white.constraints._offset, 0)
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
self.test1.add_parameter(self.white, 0)
self.assertIs(self.test1.constraints, self.white.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
@ -60,17 +60,17 @@ class Test(unittest.TestCase):
self.assertListEqual(self.test1.constraints[__fixed__].tolist(), [0])
self.assertIs(self.white._fixes_,None)
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED] + [UNFIXED] * 52)
self.test1.remove_parameter(self.white)
self.assertIs(self.test1._fixes_,None)
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1])
def test_add_parameter_already_in_hirarchy(self):
self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])
def test_default_constraints(self):
self.assertIs(self.rbf.variance.constraints._param_index_ops, self.rbf.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
@ -83,7 +83,7 @@ class Test(unittest.TestCase):
self.rbf.constrain(GPy.transformations.Square(), False)
self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(2))
self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [2])
self.test1.remove_parameter(self.rbf)
self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), [])

View file

@ -12,6 +12,7 @@ import numpy
from GPy.kern import RBF
from GPy.kern import Linear
from copy import deepcopy
from GPy.core.parameterization.variational import NormalPosterior
__test__ = lambda: 'deep' in sys.argv
# np.random.seed(0)
@ -28,53 +29,21 @@ def ard(p):
class Test(unittest.TestCase):
input_dim = 9
num_inducing = 13
N = 300
N = 1000
Nsamples = 1e6
def setUp(self):
i_s_dim_list = [2,4,3]
indices = numpy.cumsum(i_s_dim_list).tolist()
input_slices = [slice(a,b) for a,b in zip([None]+indices, indices)]
#input_slices[2] = deepcopy(input_slices[1])
input_slice_kern = GPy.kern.kern(9,
[
RBF(i_s_dim_list[0], np.random.rand(), np.random.rand(i_s_dim_list[0]), ARD=True),
RBF(i_s_dim_list[1], np.random.rand(), np.random.rand(i_s_dim_list[1]), ARD=True),
Linear(i_s_dim_list[2], np.random.rand(i_s_dim_list[2]), ARD=True)
],
input_slices = input_slices
)
self.kerns = (
# input_slice_kern,
# (GPy.kern.rbf(self.input_dim, ARD=True) +
# GPy.kern.linear(self.input_dim, ARD=True) +
# GPy.kern.bias(self.input_dim) +
# GPy.kern.white(self.input_dim)),
(#GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
GPy.kern.Linear(self.input_dim, np.random.rand(self.input_dim), ARD=True)
+GPy.kern.RBF(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
# +GPy.kern.bias(self.input_dim)
# +GPy.kern.white(self.input_dim)),
),
# (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) +
# GPy.kern.bias(self.input_dim, np.random.rand())),
# (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
# +GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True)
# #+GPy.kern.bias(self.input_dim, np.random.rand())
# #+GPy.kern.white(self.input_dim, np.random.rand())),
# ),
# GPy.kern.white(self.input_dim, np.random.rand())),
# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True),
# GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True),
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim),
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim),
# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim),
# GPy.kern.bias(self.input_dim), GPy.kern.white(self.input_dim),
#GPy.kern.RBF([0,1,2], ARD=True)+GPy.kern.Bias(self.input_dim)+GPy.kern.White(self.input_dim),
#GPy.kern.RBF(self.input_dim)+GPy.kern.Bias(self.input_dim)+GPy.kern.White(self.input_dim),
#GPy.kern.Linear(self.input_dim) + GPy.kern.Bias(self.input_dim) + GPy.kern.White(self.input_dim),
#GPy.kern.Linear(self.input_dim, ARD=True) + GPy.kern.Bias(self.input_dim) + GPy.kern.White(self.input_dim),
GPy.kern.Linear([1,3,6,7], ARD=True) + GPy.kern.RBF([0,5,8], ARD=True) + GPy.kern.White(self.input_dim),
)
self.q_x_mean = np.random.randn(self.input_dim)
self.q_x_variance = np.exp(np.random.randn(self.input_dim))
self.q_x_mean = np.random.randn(self.input_dim)[None]
self.q_x_variance = np.exp(.5*np.random.randn(self.input_dim))[None]
self.q_x_samples = np.random.randn(self.Nsamples, self.input_dim) * np.sqrt(self.q_x_variance) + self.q_x_mean
self.q_x = NormalPosterior(self.q_x_mean, self.q_x_variance)
self.Z = np.random.randn(self.num_inducing, self.input_dim)
self.q_x_mean.shape = (1, self.input_dim)
self.q_x_variance.shape = (1, self.input_dim)
@ -114,8 +83,9 @@ class Test(unittest.TestCase):
def test_psi2(self):
for kern in self.kerns:
kern.randomize()
Nsamples = int(np.floor(self.Nsamples/self.N))
psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
psi2 = kern.psi2(self.Z, self.q_x)
K_ = np.zeros((self.num_inducing, self.num_inducing))
diffs = []
for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
@ -130,8 +100,8 @@ class Test(unittest.TestCase):
pylab.figure(msg)
pylab.plot(diffs, marker='x', mew=.2)
# print msg, np.allclose(psi2.squeeze(), K_, rtol=1e-1, atol=.1)
self.assertTrue(np.allclose(psi2.squeeze(), K_),
#rtol=1e-1, atol=.1),
self.assertTrue(np.allclose(psi2.squeeze(), K_,
atol=.1, rtol=1),
msg=msg + ": not matching")
# sys.stdout.write(".")
except:

View file

@ -11,6 +11,7 @@ import itertools
from GPy.core import Model
from GPy.core.parameterization.param import Param
from GPy.core.parameterization.transformations import Logexp
from GPy.core.parameterization.variational import NormalPosterior
class PsiStatModel(Model):
def __init__(self, which, X, X_variance, Z, num_inducing, kernel):
@ -18,23 +19,24 @@ class PsiStatModel(Model):
self.which = which
self.X = Param("X", X)
self.X_variance = Param('X_variance', X_variance, Logexp())
self.q = NormalPosterior(self.X, self.X_variance)
self.Z = Param("Z", Z)
self.N, self.input_dim = X.shape
self.num_inducing, input_dim = Z.shape
assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
self.kern = kernel
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
self.add_parameters(self.X, self.X_variance, self.Z, self.kern)
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.q)
self.add_parameters(self.q, self.Z, self.kern)
def log_likelihood(self):
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
def parameters_changed(self):
psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.q)
self.X.gradient = psimu
self.X_variance.gradient = psiS
#psimu, psiS = numpy.ones(self.N * self.input_dim), numpy.ones(self.N * self.input_dim)
try: psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
try: psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.q)
except AttributeError: psiZ = numpy.zeros_like(self.Z)
self.Z.gradient = psiZ
#psiZ = numpy.ones(self.num_inducing * self.input_dim)
@ -176,6 +178,6 @@ if __name__ == "__main__":
+GPy.kern.White(input_dim)
)
)
m2.ensure_default_constraints()
#m2.ensure_default_constraints()
else:
unittest.main()

View file

@ -34,7 +34,7 @@ class GradientTests(unittest.TestCase):
model_fit = getattr(GPy.models, model_type)
# noise = GPy.kern.White(dimension)
kern = kern # + noise
kern = kern # + noise
if uncertain_inputs:
m = model_fit(X, Y, kernel=kern, X_variance=np.random.rand(X.shape[0], X.shape[1]))
else:
@ -60,13 +60,14 @@ class GradientTests(unittest.TestCase):
def test_GPRegression_mlp_1d(self):
''' Testing the GP regression with mlp kernel with white kernel on 1d data '''
mlp = GPy.kern.mlp(1)
mlp = GPy.kern.MLP(1)
self.check_model(mlp, model_type='GPRegression', dimension=1)
def test_GPRegression_poly_1d(self):
''' Testing the GP regression with polynomial kernel with white kernel on 1d data '''
mlp = GPy.kern.Poly(1, degree=5)
self.check_model(mlp, model_type='GPRegression', dimension=1)
#TODO:
#def test_GPRegression_poly_1d(self):
# ''' Testing the GP regression with polynomial kernel with white kernel on 1d data '''
# mlp = GPy.kern.Poly(1, degree=5)
# self.check_model(mlp, model_type='GPRegression', dimension=1)
def test_GPRegression_matern52_1D(self):
''' Testing the GP regression with matern52 kernel on 1d data '''
@ -163,14 +164,14 @@ class GradientTests(unittest.TestCase):
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2)
#@unittest.expectedFailure
# @unittest.expectedFailure
def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs'''
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
raise unittest.SkipTest("This is not implemented yet!")
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1)
#@unittest.expectedFailure
# @unittest.expectedFailure
def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs'''
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1)
@ -202,7 +203,7 @@ class GradientTests(unittest.TestCase):
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
kernel = GPy.kern.RBF(1)
m = GPy.models.GPClassification(X,Y,kernel=kernel)
m = GPy.models.GPClassification(X, Y, kernel=kernel)
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
@ -212,11 +213,11 @@ class GradientTests(unittest.TestCase):
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
Z = np.linspace(0, 15, 4)[:, None]
kernel = GPy.kern.RBF(1)
m = GPy.models.SparseGPClassification(X,Y,kernel=kernel,Z=Z)
#distribution = GPy.likelihoods.likelihood_functions.Bernoulli()
#likelihood = GPy.likelihoods.EP(Y, distribution)
#m = GPy.core.SparseGP(X, likelihood, kernel, Z)
#m.ensure_default_constraints()
m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, Z=Z)
# distribution = GPy.likelihoods.likelihood_functions.Bernoulli()
# likelihood = GPy.likelihoods.EP(Y, distribution)
# m = GPy.core.SparseGP(X, likelihood, kernel, Z)
# m.ensure_default_constraints()
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
@ -224,8 +225,8 @@ class GradientTests(unittest.TestCase):
N = 20
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]
k = GPy.kern.RBF(1) + GPy.kern.White(1)
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
m = GPy.models.FITCClassification(X, Y, kernel = k)
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
m = GPy.models.FITCClassification(X, Y, kernel=k)
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
@ -238,7 +239,7 @@ class GradientTests(unittest.TestCase):
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.RBF(1)
m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
m = GPy.models.GPMultioutputRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel_list=[k1])
m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad())
@ -251,7 +252,7 @@ class GradientTests(unittest.TestCase):
Y = np.vstack((Y1, Y2))
k1 = GPy.kern.RBF(1)
m = GPy.models.SparseGPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
m = GPy.models.SparseGPMultioutputRegression(X_list=[X1, X2], Y_list=[Y1, Y2], kernel_list=[k1])
m.constrain_fixed('.*rbf_var', 1.)
self.assertTrue(m.checkgrad())

View file

@ -14,6 +14,7 @@ import subarray_and_sorting
import caching
import diag
import initialization
import multioutput
try:
import sympy

View file

@ -9,24 +9,27 @@ class Cacher(object):
"""
def __init__(self, operation, limit=5, ignore_args=()):
def __init__(self, operation, limit=5, ignore_args=(), force_kwargs=()):
self.limit = int(limit)
self.ignore_args = ignore_args
self.force_kwargs = force_kwargs
self.operation=operation
self.cached_inputs = []
self.cached_outputs = []
self.inputs_changed = []
def __call__(self, *args):
def __call__(self, *args, **kw):
"""
A wrapper function for self.operation,
"""
#ensure that specified arguments are ignored
items = sorted(kw.items(), key=lambda x: x[0])
oa_all = args + tuple(a for _,a in items)
if len(self.ignore_args) != 0:
oa = [a for i,a in enumerate(args) if i not in self.ignore_args]
oa = [a for i,a in itertools.chain(enumerate(args), items) if i not in self.ignore_args and i not in self.force_kwargs]
else:
oa = args
oa = oa_all
# this makes sure we only add an observer once, and that None can be in args
observable_args = []
@ -37,36 +40,45 @@ class Cacher(object):
#make sure that all the found argument really are observable:
#otherswise don't cache anything, pass args straight though
if not all([isinstance(arg, Observable) for arg in observable_args]):
return self.operation(*args)
return self.operation(*args, **kw)
if len(self.force_kwargs) != 0:
# check if there are force args, which force reloading
for k in self.force_kwargs:
if k in kw and kw[k] is not None:
return self.operation(*args, **kw)
# TODO: WARNING !!! Cache OFFSWITCH !!! WARNING
# return self.operation(*args)
#if the result is cached, return the cached computation
state = [all(a is b for a, b in itertools.izip_longest(args, cached_i)) for cached_i in self.cached_inputs]
if any(state):
i = state.index(True)
if self.inputs_changed[i]:
#(elements of) the args have changed since we last computed: update
self.cached_outputs[i] = self.operation(*args)
self.inputs_changed[i] = False
return self.cached_outputs[i]
else:
#first time we've seen these arguments: compute
try:
if any(state):
i = state.index(True)
if self.inputs_changed[i]:
#(elements of) the args have changed since we last computed: update
self.cached_outputs[i] = self.operation(*args, **kw)
self.inputs_changed[i] = False
return self.cached_outputs[i]
else:
#first time we've seen these arguments: compute
#first make sure the depth limit isn't exceeded
if len(self.cached_inputs) == self.limit:
args_ = self.cached_inputs.pop(0)
[a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None]
self.inputs_changed.pop(0)
self.cached_outputs.pop(0)
#compute
self.cached_inputs.append(args)
self.cached_outputs.append(self.operation(*args))
self.inputs_changed.append(False)
[a.add_observer(self, self.on_cache_changed) for a in observable_args]
return self.cached_outputs[-1]#Max says return.
#first make sure the depth limit isn't exceeded
if len(self.cached_inputs) == self.limit:
args_ = self.cached_inputs.pop(0)
[a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None]
self.inputs_changed.pop(0)
self.cached_outputs.pop(0)
#compute
self.cached_inputs.append(oa_all)
self.cached_outputs.append(self.operation(*args, **kw))
self.inputs_changed.append(False)
[a.add_observer(self, self.on_cache_changed) for a in observable_args]
return self.cached_outputs[-1]#return
except:
raise
finally:
self.reset()
def on_cache_changed(self, arg):
"""
@ -76,7 +88,7 @@ class Cacher(object):
"""
self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)]
def reset(self, obj):
def reset(self):
"""
Totally reset the cache
"""
@ -90,15 +102,16 @@ class Cache_this(object):
"""
A decorator which can be applied to bound methods in order to cache them
"""
def __init__(self, limit=5, ignore_args=()):
def __init__(self, limit=5, ignore_args=(), force_kwargs=()):
self.limit = limit
self.ignore_args = ignore_args
self.force_args = force_kwargs
self.c = None
def __call__(self, f):
def f_wrap(*args):
def f_wrap(*args, **kw):
if self.c is None:
self.c = Cacher(f, self.limit, ignore_args=self.ignore_args)
return self.c(*args)
self.c = Cacher(f, self.limit, ignore_args=self.ignore_args, force_kwargs=self.force_args)
return self.c(*args, **kw)
f_wrap._cacher = self
f_wrap.__doc__ = "**cached**\n\n" + (f.__doc__ or "")
f_wrap.__doc__ = "**cached**" + (f.__doc__ or "")
return f_wrap

View file

@ -1,12 +1,17 @@
import numpy as np
import warnings
from .. import kern
import GPy
def build_XY(input_list,output_list=None,index=None):
def get_slices(input_list):
num_outputs = len(input_list)
_s = [0] + [ _x.shape[0] for _x in input_list ]
_s = np.cumsum(_s)
slices = [slice(a,b) for a,b in zip(_s[:-1],_s[1:])]
return slices
def build_XY(input_list,output_list=None,index=None):
num_outputs = len(input_list)
if output_list is not None:
assert num_outputs == len(output_list)
Y = np.vstack(output_list)
@ -15,42 +20,82 @@ def build_XY(input_list,output_list=None,index=None):
if index is not None:
assert len(index) == num_outputs
I = np.vstack( [j*np.ones((_x.shape[0],1)) for _x,j in zip(input_list,index)] )
I = np.hstack( [np.repeat(j,_x.shape[0]) for _x,j in zip(input_list,index)] )
else:
I = np.vstack( [j*np.ones((_x.shape[0],1)) for _x,j in zip(input_list,range(num_outputs))] )
I = np.hstack( [np.repeat(j,_x.shape[0]) for _x,j in zip(input_list,range(num_outputs))] )
X = np.vstack(input_list)
X = np.hstack([X,I])
return X,Y,slices
X = np.hstack([X,I[:,None]])
def build_lcm(input_dim, num_outputs, CK = [], NC = [], W_columns=1,W=None,kappa=None):
#TODO build_icm or build_lcm
return X,Y,I[:,None]#slices
def build_likelihood(Y_list,noise_index,likelihoods_list=None):
Ny = len(Y_list)
if likelihoods_list is None:
likelihoods_list = [GPy.likelihoods.Gaussian(name="Gaussian_noise_%s" %j) for y,j in zip(Y_list,range(Ny))]
else:
assert len(likelihoods_list) == Ny
likelihood = GPy.likelihoods.mixed_noise.MixedNoise(likelihoods_list=likelihoods_list, noise_index=noise_index)
return likelihood
def ICM(input_dim, num_outputs, kernel, W_rank=1,W=None,kappa=None,name='X'):
"""
Builds a kernel for a linear coregionalization model
Builds a kernel for an Intrinsic Coregionalization Model
:input_dim: Input dimensionality
:num_outputs: Number of outputs
:param CK: List of coregionalized kernels (i.e., this will be multiplied by a coregionalize kernel).
:param K: List of kernels that will be added up together with CK, but won't be multiplied by a coregionalize kernel
:param W_columns: number tuples of the corregionalization parameters 'coregion_W'
:type W_columns: integer
:param kernel: kernel that will be multiplied by the coregionalize kernel (matrix B).
:type kernel: a GPy kernel
:param W_rank: number tuples of the corregionalization parameters 'W'
:type W_rank: integer
"""
if kernel.input_dim <> input_dim:
kernel.input_dim = input_dim
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
for k in CK:
if k.input_dim <> input_dim:
k.input_dim = input_dim
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
K = kernel.prod(GPy.kern.Coregionalize([input_dim], num_outputs,W_rank,W,kappa,name='B'),name=name)
#K = kernel ** GPy.kern.Coregionalize(input_dim, num_outputs,W_rank,W,kappa, name= 'B')
K['.*variance'] = 1.
K['.*variance'].fix()
return K
for k in NC:
if k.input_dim <> input_dim + 1:
k.input_dim = input_dim + 1
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
kernel = CK[0].prod(kern.Coregionalize(num_outputs,W_columns,W,kappa),tensor=True)
for k in CK[1:]:
k_coreg = kern.Coregionalize(num_outputs,W_columns,W,kappa)
kernel += k.prod(k_coreg,tensor=True)
for k in NC:
kernel += k
def LCM(input_dim, num_outputs, kernels_list, W_rank=1,name='X'):
"""
Builds a kernel for an Linear Coregionalization Model
return kernel
:input_dim: Input dimensionality
:num_outputs: Number of outputs
:param kernel: kernel that will be multiplied by the coregionalize kernel (matrix B).
:type kernel: a GPy kernel
:param W_rank: number tuples of the corregionalization parameters 'W'
:type W_rank: integer
"""
Nk = len(kernels_list)
K = ICM(input_dim,num_outputs,kernels_list[0],W_rank,name='%s%s' %(name,0))
j = 1
for kernel in kernels_list[1:]:
K += ICM(input_dim,num_outputs,kernel,W_rank,name='%s%s' %(name,j))
return K
def Private(input_dim, num_outputs, kernel, output, kappa=None,name='X'):
"""
Builds a kernel for an Intrinsic Coregionalization Model
:input_dim: Input dimensionality
:num_outputs: Number of outputs
:param kernel: kernel that will be multiplied by the coregionalize kernel (matrix B).
:type kernel: a GPy kernel
:param W_rank: number tuples of the corregionalization parameters 'W'
:type W_rank: integer
"""
K = ICM(input_dim,num_outputs,kernel,W_rank=1,kappa=kappa,name=name)
K.B.W.fix(0)
_range = range(num_outputs)
_range.pop(output)
for j in _range:
K.B.kappa[j] = 0
K.B.kappa[j].fix()
return K