merge changes

This commit is contained in:
Zhenwen Dai 2014-03-21 22:10:24 +00:00
commit 8a83845937
65 changed files with 1197 additions and 600 deletions

View file

@ -7,7 +7,7 @@ import warnings
from .. import kern
from ..util.linalg import dtrtrs
from model import Model
from parameterization import ObservableArray
from parameterization import ObsAr
from .. import likelihoods
from ..likelihoods.gaussian import Gaussian
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
@ -31,21 +31,19 @@ class GP(Model):
super(GP, self).__init__(name)
assert X.ndim == 2
if isinstance(X, (ObservableArray, VariationalPosterior)):
if isinstance(X, (ObsAr, VariationalPosterior)):
self.X = X
else: self.X = ObservableArray(X)
else: self.X = ObsAr(X)
self.num_data, self.input_dim = self.X.shape
assert Y.ndim == 2
self.Y = ObservableArray(Y)
self.Y = ObsAr(Y)
assert Y.shape[0] == self.num_data
_, self.output_dim = self.Y.shape
if Y_metadata is None:
self.Y_metadata = {}
else:
self.Y_metadata = Y_metadata
#TODO: check the type of this is okay?
self.Y_metadata = Y_metadata
assert isinstance(kernel, kern.Kern)
#assert self.input_dim == kernel.input_dim
@ -76,25 +74,27 @@ class GP(Model):
def _raw_predict(self, _Xnew, full_cov=False):
"""
Internal helper function for making predictions, does not account
for normalization or likelihood
For making predictions, does not account for normalization or likelihood
full_cov is a boolean which defines whether the full covariance matrix
of the prediction is computed. If full_cov is False (default), only the
diagonal of the covariance is returned.
$$
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
$$
"""
Kx = self.kern.K(_Xnew, self.X).T
#LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1)
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = self.kern.K(_Xnew)
#var = Kxx - tdot(LiKx.T)
var = np.dot(Kx.T, WiKx)
var = Kxx - np.dot(Kx.T, WiKx)
else:
Kxx = self.kern.Kdiag(_Xnew)
#var = Kxx - np.sum(LiKx*LiKx, 0)
var = Kxx - np.sum(WiKx*Kx, 0)
var = var.reshape(-1, 1)

View file

@ -10,11 +10,11 @@ class Mapping(Parameterized):
Base model for shared behavior between models that can act like a mapping.
"""
def __init__(self, input_dim, output_dim):
def __init__(self, input_dim, output_dim, name='mapping'):
self.input_dim = input_dim
self.output_dim = output_dim
super(Mapping, self).__init__()
super(Mapping, self).__init__(name=name)
# Model.__init__(self)
# All leaf nodes should call self._set_params(self._get_params()) at
# the end

View file

@ -1,5 +1,5 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from param import Param, ObservableArray
from param import Param, ObsAr
from parameterized import Parameterized

View file

@ -1,25 +1,25 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
__updated__ = '2013-12-16'
__updated__ = '2014-03-17'
import numpy as np
from parameter_core import Observable
class ObservableArray(np.ndarray, Observable):
class ObsAr(np.ndarray, Observable):
"""
An ndarray which reports changes to its observers.
The observers can add themselves with a callable, which
will be called every time this array changes. The callable
takes exactly one argument, which is this array itself.
"""
__array_priority__ = -1 # Never give back ObservableArray
__array_priority__ = -1 # Never give back ObsAr
def __new__(cls, input_array, *a, **kw):
if not isinstance(input_array, ObservableArray):
if not isinstance(input_array, ObsAr):
obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls)
else: obj = input_array
cls.__name__ = "ObservableArray\n "
super(ObservableArray, obj).__init__(*a, **kw)
#cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing
super(ObsAr, obj).__init__(*a, **kw)
return obj
def __array_finalize__(self, obj):
@ -30,6 +30,14 @@ class ObservableArray(np.ndarray, Observable):
def __array_wrap__(self, out_arr, context=None):
return out_arr.view(np.ndarray)
def __reduce__(self):
func, args, state = np.ndarray.__reduce__(self)
return func, args, (state, Observable._getstate(self))
def __setstate__(self, state):
np.ndarray.__setstate__(self, state[0])
Observable._setstate(self, state[1])
def _s_not_empty(self, s):
# this checks whether there is something picked by this slice.
return True
@ -46,7 +54,7 @@ class ObservableArray(np.ndarray, Observable):
def __setitem__(self, s, val):
if self._s_not_empty(s):
super(ObservableArray, self).__setitem__(s, val)
super(ObsAr, self).__setitem__(s, val)
self.notify_observers(self[s])
def __getslice__(self, start, stop):
@ -56,7 +64,7 @@ class ObservableArray(np.ndarray, Observable):
return self.__setitem__(slice(start, stop), val)
def __copy__(self, *args):
return ObservableArray(self.view(np.ndarray).copy())
return ObsAr(self.view(np.ndarray).copy())
def copy(self, *args):
return self.__copy__(*args)

View file

@ -4,7 +4,7 @@
import itertools
import numpy
from parameter_core import OptimizationHandlable, adjust_name_for_printing
from array_core import ObservableArray
from array_core import ObsAr
###### printing
__constraints_name__ = "Constraint"
@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
__print_threshold__ = 5
######
class Param(OptimizationHandlable, ObservableArray):
class Param(OptimizationHandlable, ObsAr):
"""
Parameter object for GPy models.
@ -226,7 +226,7 @@ class Param(OptimizationHandlable, ObservableArray):
# Constrainable
#===========================================================================
def _ensure_fixes(self):
self._fixes_ = numpy.ones(self._realsize_, dtype=bool)
if not self._has_fixes(): self._fixes_ = numpy.ones(self._realsize_, dtype=bool)
#===========================================================================
# Convenience
@ -269,6 +269,8 @@ class Param(OptimizationHandlable, ObservableArray):
@property
def _ties_str(self):
return ['']
def _ties_for(self, ravi):
return [['N/A']]*ravi.size
def __repr__(self, *args, **kwargs):
name = "\033[1m{x:s}\033[0;0m:\n".format(
x=self.hierarchy_name())
@ -312,7 +314,7 @@ class Param(OptimizationHandlable, ObservableArray):
ravi = self._raveled_index(filter_)
if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi)
if prirs is None: prirs = self.priors.properties_for(ravi)
if ties is None: ties = [['N/A']]*self.size
if ties is None: ties = self._ties_for(ravi)
ties = [' '.join(map(lambda x: x, t)) for t in ties]
if lc is None: lc = self._max_len_names(constr_matrix, __constraints_name__)
if lx is None: lx = self._max_len_values()

View file

@ -16,7 +16,7 @@ Observable Pattern for patameterization
from transformations import Transformation, Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
import numpy as np
__updated__ = '2014-03-14'
__updated__ = '2014-03-18'
class HierarchyError(Exception):
"""
@ -56,7 +56,7 @@ class InterfacePickleFunctions(object):
"""
raise NotImplementedError, "To be able to use pickling you need to implement this method"
class Pickleable(object):
class Pickleable(InterfacePickleFunctions):
"""
Make an object pickleable (See python doc 'pickling').
@ -95,7 +95,7 @@ class Pickleable(object):
def _has_get_set_state(self):
return '_getstate' in vars(self.__class__) and '_setstate' in vars(self.__class__)
class Observable(InterfacePickleFunctions):
class Observable(Pickleable):
"""
Observable pattern for parameterization.
@ -155,6 +155,7 @@ class Observable(InterfacePickleFunctions):
def _getstate(self):
return [self._observer_callables_]
def _setstate(self, state):
self._observer_callables_ = state.pop()
@ -376,7 +377,7 @@ class Constrainable(Nameable, Indexable):
# Ensure that the fixes array is set:
# Parameterized: ones(self.size)
# Param: ones(self._realsize_
self._fixes_ = np.ones(self.size, dtype=bool)
if not self._has_fixes(): self._fixes_ = np.ones(self.size, dtype=bool)
def _set_fixed(self, index):
self._ensure_fixes()
@ -397,7 +398,7 @@ class Constrainable(Nameable, Indexable):
self._fixes_ = None
def _has_fixes(self):
return hasattr(self, "_fixes_") and self._fixes_ is not None
return hasattr(self, "_fixes_") and self._fixes_ is not None and self._fixes_.size == self.size
#===========================================================================
# Prior Operations
@ -540,12 +541,12 @@ class Constrainable(Nameable, Indexable):
print "WARNING: reconstraining parameters {}".format(self.parameter_names() or self.name)
which.add(what, self._raveled_index())
def _remove_from_index_operations(self, which, what):
def _remove_from_index_operations(self, which, transforms):
"""
Helper preventing copy code.
Remove given what (transform prior etc) from which param index ops.
"""
if len(what) == 0:
if len(transforms) == 0:
transforms = which.properties()
removed = np.empty((0,), dtype=int)
for t in transforms:
@ -566,24 +567,32 @@ class OptimizationHandlable(Constrainable):
super(OptimizationHandlable, self).__init__(name, default_constraint=default_constraint, *a, **kw)
def transform(self):
[np.put(self._param_array_, ind, c.finv(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
[np.put(self._param_array_, ind, c.finv(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def untransform(self):
[np.put(self._param_array_, ind, c.f(self._param_array_[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
[np.put(self._param_array_, ind, c.f(self._param_array_.flat[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
def _get_params_transformed(self):
# transformed parameters (apply transformation rules)
p = self._param_array_.copy()
[np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
if self._has_fixes():
if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
return p[fixes]
elif self._has_fixes():
return p[self._fixes_]
return p
def _set_params_transformed(self, p):
if p is self._param_array_:
p = p.copy()
if self._has_fixes(): self._param_array_[self._fixes_] = p
else: self._param_array_[:] = p
if self.has_parent() and self.constraints[__fixed__].size != 0:
fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED
self._param_array_.flat[fixes] = p
elif self._has_fixes(): self._param_array_.flat[self._fixes_] = p
else: self._param_array_.flat = p
self.untransform()
self._trigger_params_changed()
@ -661,8 +670,8 @@ class OptimizationHandlable(Constrainable):
for pi in self._parameters_:
pislice = slice(pi_old_size, pi_old_size+pi.size)
self._param_array_[pislice] = pi._param_array_.ravel()#, requirements=['C', 'W']).flat
self._gradient_array_[pislice] = pi._gradient_array_.ravel()#, requirements=['C', 'W']).flat
self._param_array_[pislice] = pi._param_array_.flat#, requirements=['C', 'W']).flat
self._gradient_array_[pislice] = pi._gradient_array_.flat#, requirements=['C', 'W']).flat
pi._param_array_.data = parray[pislice].data
pi._gradient_array_.data = garray[pislice].data
@ -769,11 +778,11 @@ class Parameterizable(OptimizationHandlable):
Add all parameters to this param class, you can insert parameters
at any given index using the :func:`list.insert` syntax
"""
# if param.has_parent():
# raise AttributeError, "parameter {} already in another model, create new object (or copy) for adding".format(param._short())
if param in self._parameters_ and index is not None:
self.remove_parameter(param)
self.add_parameter(param, index)
#elif param.has_parent():
# raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
elif param not in self._parameters_:
if param.has_parent():
parent = param._parent_
@ -797,13 +806,19 @@ class Parameterizable(OptimizationHandlable):
param.add_observer(self, self._pass_through_notify_observers, -np.inf)
self.size += param.size
parent = self
while parent is not None:
parent.size += param.size
parent = parent._parent_
self._connect_parameters()
self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
self._highest_parent_._notify_parent_change()
self._highest_parent_._connect_fixes()
self._connect_parameters(ignore_added_names=_ignore_added_names)
self._notify_parent_change()
self._connect_fixes()
else:
raise RuntimeError, """Parameter exists already added and no copy made"""
raise HierarchyError, """Parameter exists already and no copy made"""
def add_parameters(self, *parameters):
@ -829,17 +844,18 @@ class Parameterizable(OptimizationHandlable):
param.remove_observer(self, self._pass_through_notify_observers)
self.constraints.shift_left(start, param.size)
self._connect_fixes()
self._connect_parameters()
self._notify_parent_change()
parent = self._parent_
while parent is not None:
parent._connect_fixes()
parent._connect_parameters()
parent._notify_parent_change()
parent.size -= param.size
parent = parent._parent_
self._highest_parent_._connect_parameters()
self._highest_parent_._connect_fixes()
self._highest_parent_._notify_parent_change()
def _connect_parameters(self, ignore_added_names=False):
# connect parameterlist to this parameterized object
# This just sets up the right connection for the params objects
@ -862,8 +878,8 @@ class Parameterizable(OptimizationHandlable):
# first connect all children
p._propagate_param_grad(self._param_array_[pslice], self._gradient_array_[pslice])
# then connect children to self
self._param_array_[pslice] = p._param_array_.ravel()#, requirements=['C', 'W']).ravel(order='C')
self._gradient_array_[pslice] = p._gradient_array_.ravel()#, requirements=['C', 'W']).ravel(order='C')
self._param_array_[pslice] = p._param_array_.flat#, requirements=['C', 'W']).ravel(order='C')
self._gradient_array_[pslice] = p._gradient_array_.flat#, requirements=['C', 'W']).ravel(order='C')
if not p._param_array_.flags['C_CONTIGUOUS']:
import ipdb;ipdb.set_trace()

View file

@ -7,10 +7,10 @@ from domains import _POSITIVE,_NEGATIVE, _BOUNDED
import weakref
import sys
#_lim_val = -np.log(sys.float_info.epsilon)
_exp_lim_val = np.finfo(np.float64).max
_lim_val = np.log(_exp_lim_val)
_lim_val = 36.0
epsilon = np.finfo(np.float64).resolution
#===============================================================================
# Fixing constants
@ -54,19 +54,19 @@ class Transformation(object):
class Logexp(Transformation):
domain = _POSITIVE
def f(self, x):
return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -_lim_val, _lim_val))))
return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -_lim_val, _lim_val)))) + epsilon
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
def finv(self, f):
return np.where(f>_lim_val, f, np.log(np.exp(f+1e-20) - 1.))
def gradfactor(self, f):
return np.where(f>_lim_val, 1., 1 - np.exp(-f))
return np.where(f>_lim_val, 1., 1. - np.exp(-f))
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '+ve'
class LogexpNeg(Transformation):
domain = _POSITIVE
@ -98,7 +98,7 @@ class NegativeLogexp(Transformation):
return -self.logexp.initialize(f) # np.abs(f)
def __str__(self):
return '-ve'
class LogexpClipped(Logexp):
max_bound = 1e100
min_bound = 1e-10

View file

@ -64,8 +64,8 @@ class SparseGP(GP):
self.kern.gradient += target
#gradients wrt Z
self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_Z_expectations(
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
else:
#gradients wrt kernel
@ -76,8 +76,8 @@ class SparseGP(GP):
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
self.kern.gradient += target
#gradients wrt Z
self.Z.gradient[:,self.kern.active_dims] = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient[:,self.kern.active_dims] += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
def _raw_predict(self, Xnew, full_cov=False):
"""
@ -88,8 +88,9 @@ class SparseGP(GP):
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov:
Kxx = self.kern.K(Xnew)
#var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx)
var = Kxx - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
#var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
var = var.squeeze()
else:
Kxx = self.kern.Kdiag(Xnew)
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T

View file

@ -6,4 +6,4 @@ import regression
import dimensionality_reduction
import tutorials
import stochastic
import non_gaussian
import non_gaussian

View file

@ -23,13 +23,10 @@ K = Bias.prod(Coreg,name='X')
#K.coregion.W = 0
#print K.coregion.W
#print Bias.K(_X,_X)
#print K.K(X,X)
#pb.matshow(K.K(X,X))
Mlist = [GPy.kern.Matern32(1,lengthscale=20.,name="Mat")]
kern = GPy.util.multioutput.LCM(input_dim=1,num_outputs=2,kernels_list=Mlist,name='H')
kern.B.W = 0
@ -37,16 +34,22 @@ kern.B.kappa = 1.
#kern.B.W.fix()
#kern.B.kappa.fix()
#m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2], kernel=kern)
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1], Y_list=[Y1], kernel=kern)
Z1 = np.array([1.5,2.5])[:,None]
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1], Y_list=[Y1], Z_list = [Z1], kernel=kern)
#m.optimize()
m.checkgrad(verbose=1)
"""
fig = pb.figure()
ax0 = fig.add_subplot(211)
ax1 = fig.add_subplot(212)
slices = GPy.util.multioutput.get_slices([Y1,Y2])
m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],ax=ax0)
#m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],ax=ax1)
"""

View file

@ -160,6 +160,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
import GPy
from matplotlib import pyplot as plt
from ..util.misc import param_to_array
_np.random.seed(0)
data = GPy.util.datasets.oil()
@ -173,11 +174,11 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05)
if plot:
y = m.Y[0, :]
y = m.Y
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
m.plot_latent(ax=latent_axes)
data_show = GPy.plotting.matplot_dep.visualize.vector_show(y)
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean), # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
raw_input('Press enter to finish')
plt.close(fig)

View file

@ -158,7 +158,7 @@ def boston_example(optimize=True, plot=True):
#Gaussian GP
print "Gauss GP"
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
mgp.constrain_fixed('white', 1e-5)
mgp.constrain_fixed('.*white', 1e-5)
mgp['rbf_len'] = rbf_len
mgp['noise'] = noise
print mgp
@ -176,7 +176,7 @@ def boston_example(optimize=True, plot=True):
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
mg.constrain_positive('noise_variance')
mg.constrain_fixed('white', 1e-5)
mg.constrain_fixed('.*white', 1e-5)
mg['rbf_len'] = rbf_len
mg['noise'] = noise
print mg
@ -194,10 +194,10 @@ def boston_example(optimize=True, plot=True):
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
mstu_t.constrain_fixed('white', 1e-5)
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
mstu_t.constrain_fixed('.*white', 1e-5)
mstu_t.constrain_bounded('.*t_noise', 0.0001, 1000)
mstu_t['rbf_len'] = rbf_len
mstu_t['t_noise'] = noise
mstu_t['.*t_noise'] = noise
print mstu_t
if optimize:
mstu_t.optimize(optimizer=optimizer, messages=messages)

View file

@ -25,80 +25,51 @@ def olympic_marathon_men(optimize=True, plot=True):
return m
def coregionalization_toy2(optimize=True, plot=True):
def coregionalization_toy(optimize=True, plot=True):
"""
A simple demonstration of coregionalization on two sinusoidal functions.
"""
#build a design matrix with a column of integers indicating the output
X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5
index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
X = np.hstack((np.vstack((X1, X2)), index))
#build a suitable set of observed variables
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2.
Y = np.vstack((Y1, Y2))
#build the kernel
k1 = GPy.kern.RBF(1) + GPy.kern.Bias(1)
k2 = GPy.kern.Coregionalize(2,1)
k = k1**k2
m = GPy.models.GPRegression(X, Y, kernel=k)
m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2])
if optimize:
m.optimize('bfgs', max_iters=100)
if plot:
m.plot(fixed_inputs=[(1,0)])
m.plot(fixed_inputs=[(1,1)], ax=pb.gca())
slices = GPy.util.multioutput.get_slices([X1,X2])
m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],Y_metadata={'output_index':0})
m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],Y_metadata={'output_index':1},ax=pb.gca())
return m
#FIXME: Needs recovering once likelihoods are consolidated
#def coregionalization_toy(optimize=True, plot=True):
# """
# A simple demonstration of coregionalization on two sinusoidal functions.
# """
# X1 = np.random.rand(50, 1) * 8
# X2 = np.random.rand(30, 1) * 5
# X = np.vstack((X1, X2))
# Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
# Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
# Y = np.vstack((Y1, Y2))
#
# k1 = GPy.kern.RBF(1)
# m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
# m.constrain_fixed('.*rbf_var', 1.)
# m.optimize(max_iters=100)
#
# fig, axes = pb.subplots(2,1)
# m.plot(fixed_inputs=[(1,0)],ax=axes[0])
# m.plot(fixed_inputs=[(1,1)],ax=axes[1])
# axes[0].set_title('Output 0')
# axes[1].set_title('Output 1')
# return m
def coregionalization_sparse(optimize=True, plot=True):
"""
A simple demonstration of coregionalization on two sinusoidal functions using sparse approximations.
"""
#fetch the data from the non sparse examples
m = coregionalization_toy2(optimize=False, plot=False)
X, Y = m.X, m.Y
#build a design matrix with a column of integers indicating the output
X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5
k = GPy.kern.RBF(1)**GPy.kern.Coregionalize(2)
#build a suitable set of observed variables
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2.
#construct a model
m = GPy.models.SparseGPRegression(X,Y, num_inducing=25, kernel=k)
m.Z[:,1].fix() # don't optimize the inducing input indexes
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2])
if optimize:
m.optimize('bfgs', max_iters=100, messages=1)
m.optimize('bfgs', max_iters=100)
if plot:
m.plot(fixed_inputs=[(1,0)])
m.plot(fixed_inputs=[(1,1)], ax=pb.gca())
slices = GPy.util.multioutput.get_slices([X1,X2])
m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],Y_metadata={'output_index':0})
m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],Y_metadata={'output_index':1},ax=pb.gca())
pb.ylim(-3,)
return m

View file

@ -19,19 +19,15 @@ class DTC(object):
def __init__(self):
self.const_jitter = 1e-6
def inference(self, kern, X, Z, likelihood, Y):
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
#TODO: MAX! fix this!
from ...util.misc import param_to_array
Y = param_to_array(Y)
num_inducing, _ = Z.shape
num_data, output_dim = Y.shape
#make sure the noise is not hetero
beta = 1./np.squeeze(likelihood.variance)
if beta.size <1:
beta = 1./likelihood.gaussian_variance(Y_metadata)
if beta.size > 1:
raise NotImplementedError, "no hetero noise with this implementation of DTC"
Kmm = kern.K(Z)
@ -91,19 +87,15 @@ class vDTC(object):
def __init__(self):
self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y):
def inference(self, kern, X, X_variance, Z, likelihood, Y, Y_metadata):
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
#TODO: MAX! fix this!
from ...util.misc import param_to_array
Y = param_to_array(Y)
num_inducing, _ = Z.shape
num_data, output_dim = Y.shape
#make sure the noise is not hetero
beta = 1./np.squeeze(likelihood.variance)
if beta.size <1:
beta = 1./likelihood.gaussian_variance(Y_metadata)
if beta.size > 1:
raise NotImplementedError, "no hetero noise with this implementation of DTC"
Kmm = kern.K(Z)
@ -112,7 +104,7 @@ class vDTC(object):
U = Knm
Uy = np.dot(U.T,Y)
#factor Kmm
#factor Kmm
Kmmi, L, Li, _ = pdinv(Kmm)
# Compute A

View file

@ -3,6 +3,7 @@
from posterior import Posterior
from ...util.linalg import pdinv, dpotrs, tdot
from ...util import diag
import numpy as np
log_2_pi = np.log(2*np.pi)
@ -41,7 +42,9 @@ class ExactGaussianInference(object):
K = kern.K(X)
Wi, LW, LWi, W_logdet = pdinv(K + likelihood.covariance_matrix(Y, Y_metadata))
Ky = K.copy()
diag.add(Ky, likelihood.gaussian_variance(Y_metadata))
Wi, LW, LWi, W_logdet = pdinv(Ky)
alpha, _ = dpotrs(LW, YYT_factor, lower=1)

View file

@ -11,9 +11,9 @@ class EP(object):
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float
:param eta: Power EP thing TODO: Ricardo: what, exactly?
:param eta: parameter for fractional EP updates.
:type eta: float64
:param delta: Power EP thing TODO: Ricardo: what, exactly?
:param delta: damping EP updates factor.
:type delta: float64
"""
self.epsilon, self.eta, self.delta = epsilon, eta, delta

View file

@ -17,14 +17,14 @@ class FITC(object):
"""
const_jitter = 1e-6
def inference(self, kern, X, Z, likelihood, Y):
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
num_inducing, _ = Z.shape
num_data, output_dim = Y.shape
#make sure the noise is not hetero
sigma_n = np.squeeze(likelihood.variance)
if sigma_n.size <1:
sigma_n = likelihood.gaussian_variance(Y_metadata)
if sigma_n.size >1:
raise NotImplementedError, "no hetero noise with this implementation of FITC"
Kmm = kern.K(Z)

View file

@ -51,12 +51,11 @@ class Laplace(object):
Ki_f_init = self._previous_Ki_fhat
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
self.f_hat = f_hat
self.Ki_fhat = Ki_fhat
self.K = K.copy()
#Compute hessian and other variables at mode
log_marginal, woodbury_vector, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
self._previous_Ki_fhat = Ki_fhat.copy()
return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
@ -86,13 +85,13 @@ class Laplace(object):
#define the objective function (to be maximised)
def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, extra_data=Y_metadata)
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + likelihood.logpdf(f, Y, Y_metadata=Y_metadata)
difference = np.inf
iteration = 0
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
W = -likelihood.d2logpdf_df2(f, Y, extra_data=Y_metadata)
grad = likelihood.dlogpdf_df(f, Y, extra_data=Y_metadata)
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
W_f = W*f
@ -136,13 +135,12 @@ class Laplace(object):
At the mode, compute the hessian and effective covariance matrix.
returns: logZ : approximation to the marginal likelihood
woodbury_vector : variable required for calculating the approximation to the covariance matrix
woodbury_inv : variable required for calculating the approximation to the covariance matrix
dL_dthetaL : array of derivatives (1 x num_kernel_params)
dL_dthetaL : array of derivatives (1 x num_likelihood_params)
"""
#At this point get the hessian matrix (or vector as W is diagonal)
W = -likelihood.d2logpdf_df2(f_hat, Y, extra_data=Y_metadata)
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
@ -151,11 +149,10 @@ class Laplace(object):
Ki_W_i = K - C.T.dot(C) #Could this be wrong?
#compute the log marginal
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, extra_data=Y_metadata) - np.sum(np.log(np.diag(L)))
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata) - np.sum(np.log(np.diag(L)))
#Compute vival matrices for derivatives
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, extra_data=Y_metadata) # -d3lik_d3fhat
woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, extra_data=Y_metadata)
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) #why isn't this -0.5? s2 in R&W p126 line 9.
#BiK, _ = dpotrs(L, K, lower=1)
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
@ -169,7 +166,7 @@ class Laplace(object):
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
#Implicit
implicit_part = np.dot(woodbury_vector, dL_dfhat.T).dot(I_KW_i)
implicit_part = np.dot(Ki_f, dL_dfhat.T).dot(I_KW_i)
dL_dK = explicit_part + implicit_part
else:
@ -179,7 +176,7 @@ class Laplace(object):
#compute dL_dthetaL#
####################
if likelihood.size > 0 and not likelihood.is_fixed:
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, extra_data=Y_metadata)
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, Y_metadata=Y_metadata)
num_params = likelihood.size
# make space for one derivative for each likelihood parameter
@ -200,7 +197,7 @@ class Laplace(object):
else:
dL_dthetaL = np.zeros(likelihood.size)
return log_marginal, woodbury_vector, K_Wi_i, dL_dK, dL_dthetaL
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave):
"""

View file

@ -73,20 +73,37 @@ class Posterior(object):
@property
def mean(self):
"""
Posterior mean
$$
K_{xx}v
v := \texttt{Woodbury vector}
$$
"""
if self._mean is None:
self._mean = np.dot(self._K, self.woodbury_vector)
return self._mean
@property
def covariance(self):
"""
Posterior covariance
$$
K_{xx} - K_{xx}W_{xx}^{-1}K_{xx}
W_{xx} := \texttt{Woodbury inv}
$$
"""
if self._covariance is None:
#LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
self._covariance = self._K - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T
self._covariance = self._K - (np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze()
#self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
return self._covariance.squeeze()
return self._covariance
@property
def precision(self):
"""
Inverse of posterior covariance
"""
if self._precision is None:
cov = np.atleast_3d(self.covariance)
self._precision = np.zeros(cov.shape) # if one covariance per dimension
@ -96,8 +113,15 @@ class Posterior(object):
@property
def woodbury_chol(self):
"""
return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix
$$
L_{W}L_{W}^{\top} = W^{-1}
W^{-1} := \texttt{Woodbury inv}
$$
"""
if self._woodbury_chol is None:
#compute woodbury chol from
#compute woodbury chol from
if self._woodbury_inv is not None:
winv = np.atleast_3d(self._woodbury_inv)
self._woodbury_chol = np.zeros(winv.shape)
@ -121,6 +145,13 @@ class Posterior(object):
@property
def woodbury_inv(self):
"""
The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
$$
(K_{xx} + \Sigma_{xx})^{-1}
\Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
$$
"""
if self._woodbury_inv is None:
self._woodbury_inv, _ = dpotri(self.woodbury_chol, lower=1)
#self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1)
@ -129,17 +160,22 @@ class Posterior(object):
@property
def woodbury_vector(self):
"""
Woodbury vector in the gaussian likelihood case only is defined as
$$
(K_{xx} + \Sigma)^{-1}Y
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
$$
"""
if self._woodbury_vector is None:
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean)
return self._woodbury_vector
@property
def K_chol(self):
"""
Cholesky of the prior covariance K
"""
if self._K_chol is None:
self._K_chol = jitchol(self._K)
return self._K_chol

View file

@ -176,7 +176,6 @@ class VarDTC(object):
#construct a posterior object
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict
class VarDTCMissingData(object):
@ -365,7 +364,7 @@ class VarDTCMissingData(object):
return post, log_marginal, grad_dict
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones([num_data, 1])).flatten()
dL_dpsi0 = -0.5 * output_dim * (beta[:,None] * np.ones([num_data, 1])).flatten()
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
if het_noise:

View file

@ -0,0 +1,63 @@
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
####### Preliminar BO with standad acquisition functions ###############################
# Types of BO
# MM: Maximum (or minimum) mean
# MPI: Maximum posterior improvement
# MUI: Maximum upper interval
def BOacquisition(X,Y,model,type_bo="MPI",type_objective="max",par_mpi = 0,z_mui=1.96,plot=True,n_eval = 500):
# Only works in dimension 1
# Grid where the GP will be evaluated
X_star = np.linspace(min(X)-10,max(X)+10,n_eval)
X_star = X_star[:,None]
# Posterior GP evaluated on the grid
fest = model.predict(X_star)
# Calculate the acquisition function
## IF Maximize
if type_objective == "max":
if type_bo == "MPI": # add others here
acqu = norm.cdf((fest[0]-(1+par_mpi)*max(fest[0])) / fest[1])
acqu = acqu/(2*max(acqu))
if type_bo == "MM":
acqu = fest[0]/max(fest[0])
acqu = acqu/(2*max(acqu))
if type_bo == "MUI":
acqu = fest[0]+z_mui*np.sqrt(fest[1])
acqu = acqu/(2*max(acqu))
optimal_loc = np.argmax(acqu)
x_new = X_star[optimal_loc]
## IF Minimize
if type_objective == "min":
if type_bo == "MPI": # add others here
acqu = 1-norm.cdf((fest[0]-(1+par_mpi)*min(fest[0])) / fest[1])
acqu = acqu/(2*max(acqu))
if type_bo == "MM":
acqu = 1-fest[0]/max(fest[0])
acqu = acqu/(2*max(acqu))
if type_bo == "MUI":
acqu = -fest[0]+z_mui*np.sqrt(fest[1])
acqu = acqu/(2*max(acqu))
optimal_loc = np.argmax(acqu)
x_new = X_star[optimal_loc]
# Plot GP posterior, collected data and the acquisition function
if plot:
plt.plot(X,Y , 'p')
plt.title('Acquisition function')
model.plot()
plt.plot(X_star, acqu, 'r--')
# Return the point where we shoould take the new sample
return x_new
###############################################################

View file

@ -9,4 +9,6 @@ from _src.mlp import MLP
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
from _src.independent_outputs import IndependentOutputs, Hierarchical
from _src.coregionalize import Coregionalize
from _src.ssrbf import SSRBF
from _src.ssrbf import SSRBF # TODO: ZD: did you remove this?
from _src.ODE_UY import ODE_UY

282
GPy/kern/_src/ODE_UY.py Normal file
View file

@ -0,0 +1,282 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
from independent_outputs import index_to_slices
class ODE_UY(Kern):
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., active_dims=None, name='ode_uy'):
assert input_dim ==2, "only defined for 2 input dims"
super(ODE_UY, self).__init__(input_dim, active_dims, name)
self.variance_Y = Param('variance_Y', variance_Y, Logexp())
self.variance_U = Param('variance_U', variance_Y, Logexp())
self.lengthscale_Y = Param('lengthscale_Y', lengthscale_Y, Logexp())
self.lengthscale_U = Param('lengthscale_U', lengthscale_Y, Logexp())
self.add_parameters(self.variance_Y, self.variance_U, self.lengthscale_Y, self.lengthscale_U)
def K(self, X, X2=None):
# model : a * dy/dt + b * y = U
#lu=sqrt(3)/theta1 ly=1/theta2 theta2= a/b :thetay sigma2=1/(2ab) :sigmay
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
K = np.zeros((X.shape[0], X2.shape[0]))
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
#iu=self.input_lengthU #dimention of U
Vu=self.variance_U
Vy=self.variance_Y
#Vy=ly/2
#stop
# kernel for kuu matern3/2
kuu = lambda dist:Vu * (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
# kernel for kyy
k1 = lambda dist:np.exp(-ly*np.abs(dist))*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist:(np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist:np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
kyy = lambda dist:Vu*Vy*(k1(dist) + k2(dist) + k3(dist))
# cross covariance function
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
#kyu3 = lambda dist: 0
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
#k1cros = lambda dist:0
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
#k2cros = lambda dist:0
Vyu=np.sqrt(Vy*ly*2)
# cross covariance kuy
kuyp = lambda dist:Vu*Vyu*(kyu3(dist)) #t>0 kuy
kuyn = lambda dist:Vu*Vyu*(k1cros(dist)+k2cros(dist)) #t<0 kuy
# cross covariance kyu
kyup = lambda dist:Vu*Vyu*(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
kyun = lambda dist:Vu*Vyu*(kyu3(-dist)) #t<0 kyu
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
K[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
elif i==0 and j==1:
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[ss1,ss2]) ) )
K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(rdist[ss1,ss2]), kuyn(rdist[ss1,ss2] ) )
elif i==1 and j==1:
K[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
else:
#K[ss1,ss2]= 0
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[ss1,ss2]) ) )
K[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(rdist[ss1,ss2]), kyun(rdist[ss1,ss2] ) )
return K
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X."""
Kdiag = np.zeros(X.shape[0])
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
Vu = self.variance_U
Vy=self.variance_Y
k1 = (2*lu+ly)/(lu+ly)**2
k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2
k3 = 1/(lu+ly) + (lu)/(lu+ly)**2
slices = index_to_slices(X[:,-1])
for i, ss1 in enumerate(slices):
for s1 in ss1:
if i==0:
Kdiag[s1]+= self.variance_U
elif i==1:
Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else:
raise ValueError, "invalid input/output index"
#Kdiag[slices[0][0]]+= self.variance_U #matern32 diag
#Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag
return Kdiag
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
Vu=self.variance_U
Vy=self.variance_Y
Vyu = np.sqrt(Vy*ly*2)
dVdly = 0.5/np.sqrt(ly)*np.sqrt(2*Vy)
dVdVy = 0.5/np.sqrt(Vy)*np.sqrt(2*ly)
rd=rdist.shape
dktheta1 = np.zeros(rd)
dktheta2 = np.zeros(rd)
dkUdvar = np.zeros(rd)
dkYdvar = np.zeros(rd)
# dk dtheta for UU
UUdtheta1 = lambda dist: np.exp(-lu* dist)*dist + (-dist)*np.exp(-lu* dist)*(1+lu*dist)
UUdtheta2 = lambda dist: 0
#UUdvar = lambda dist: (1 + lu*dist)*np.exp(-lu*dist)
UUdvar = lambda dist: (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
# dk dtheta for YY
dk1theta1 = lambda dist: np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
dk2theta1 = lambda dist: (1.0)*(
np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2)
+np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3)
+np.exp(-dist*ly)*2*(ly-lu)**(-2)
+np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
)
dk3theta1 = lambda dist: np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
#dktheta1 = lambda dist: self.variance_U*self.variance_Y*(dk1theta1+dk2theta1+dk3theta1)
dk1theta2 = lambda dist: np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) )
dk2theta2 =lambda dist: 1*(
np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
)
dk3theta2 = lambda dist: np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
#dktheta2 = lambda dist: self.variance_U*self.variance_Y*(dk1theta2 + dk2theta2 +dk3theta2)
# kyy kernel
k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
#dkdvar = k1+k2+k3
# cross covariance function
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
# cross covariance kuy
kuyp = lambda dist:(kyu3(dist)) #t>0 kuy
kuyn = lambda dist:(k1cros(dist)+k2cros(dist)) #t<0 kuy
# cross covariance kyu
kyup = lambda dist:(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
kyun = lambda dist:(kyu3(-dist)) #t<0 kyu
# dk dtheta for UY
dkyu3dtheta2 = lambda dist: np.exp(-lu*dist) * ( (-1)*(lu+ly)**(-2)*(1+lu*dist+lu*(lu+ly)**(-1)) + (lu+ly)**(-1)*(-lu)*(lu+ly)**(-2) )
dkyu3dtheta1 = lambda dist: np.exp(-lu*dist)*(lu+ly)**(-1)* ( (-dist)*(1+dist*lu+lu*(lu+ly)**(-1)) -\
(lu+ly)**(-1)*(1+dist*lu+lu*(lu+ly)**(-1)) +dist+(lu+ly)**(-1)-lu*(lu+ly)**(-2) )
dkcros2dtheta1 = lambda dist: np.exp(ly*dist)* ( -(ly+lu)**(-2) + (ly+lu)**(-2) + (-2)*lu*(lu+ly)**(-3) )
dkcros2dtheta2 = lambda dist: np.exp(ly*dist)*dist* ( (ly+lu)**(-1) + lu*(lu+ly)**(-2) ) + \
np.exp(ly*dist)*( -(lu+ly)**(-2) + lu*(-2)*(lu+ly)**(-3) )
dkcros1dtheta1 = lambda dist: np.exp(ly*dist)*( -(lu-ly)**(-2)*( 1-np.exp((lu-ly)*dist) + lu*dist*np.exp((lu-ly)*dist)+ \
lu*(1-np.exp((lu-ly)*dist))/(lu-ly) ) + (lu-ly)**(-1)*( -np.exp( (lu-ly)*dist )*dist + dist*np.exp( (lu-ly)*dist)+\
lu*dist**2*np.exp((lu-ly)*dist)+(1-np.exp((lu-ly)*dist))/(lu-ly) - lu*np.exp((lu-ly)*dist)*dist/(lu-ly) -\
lu*(1-np.exp((lu-ly)*dist))/(lu-ly)**2 ) )
dkcros1dtheta2 = lambda t: np.exp(ly*t)*t/(lu-ly)*( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)+\
lu*(1-np.exp((lu-ly)*t))/(lu-ly) )+\
np.exp(ly*t)/(lu-ly)**2* ( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t) + lu*( 1-np.exp((lu-ly)*t) )/(lu-ly) )+\
np.exp(ly*t)/(lu-ly)*( np.exp((lu-ly)*t)*t -lu*t*t*np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)/(lu-ly)+\
lu*( 1-np.exp((lu-ly)*t) )/(lu-ly)**2 )
dkuypdtheta1 = lambda dist:(dkyu3dtheta1(dist)) #t>0 kuy
dkuyndtheta1 = lambda dist:(dkcros1dtheta1(dist)+dkcros2dtheta1(dist)) #t<0 kuy
# cross covariance kyu
dkyupdtheta1 = lambda dist:(dkcros1dtheta1(-dist)+dkcros2dtheta1(-dist)) #t>0 kyu
dkyundtheta1 = lambda dist:(dkyu3dtheta1(-dist)) #t<0 kyu
dkuypdtheta2 = lambda dist:(dkyu3dtheta2(dist)) #t>0 kuy
dkuyndtheta2 = lambda dist:(dkcros1dtheta2(dist)+dkcros2dtheta2(dist)) #t<0 kuy
# cross covariance kyu
dkyupdtheta2 = lambda dist:(dkcros1dtheta2(-dist)+dkcros2dtheta2(-dist)) #t>0 kyu
dkyundtheta2 = lambda dist:(dkyu3dtheta2(-dist)) #t<0 kyu
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
#target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = Vu*UUdtheta1(np.abs(rdist[ss1,ss2]))
dktheta2[ss1,ss2] = 0
dkUdvar[ss1,ss2] = UUdvar(np.abs(rdist[ss1,ss2]))
dkYdvar[ss1,ss2] = 0
elif i==0 and j==1:
########target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
#np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) )
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta1(rdist[ss1,ss2]) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kuyp(rdist[ss1,ss2]), Vyu* kuyn(rdist[ss1,ss2]) )
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyp(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyn(rdist[ss1,ss2]) )
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kuyp(rdist[ss1,ss2]), Vu*dVdVy* kuyn(rdist[ss1,ss2]) )
elif i==1 and j==1:
#target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2])))
dktheta2[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2])))
dkUdvar[ss1,ss2] = self.variance_Y*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
dkYdvar[ss1,ss2] = self.variance_U*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
else:
#######target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) )
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) )
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta1(rdist[ss1,ss2]) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kyup(rdist[ss1,ss2]),Vyu*kyun(rdist[ss1,ss2]))
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta2(rdist[ss1,ss2])+Vu*dVdly*kyup(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta2(rdist[ss1,ss2])+Vu*dVdly*kyun(rdist[ss1,ss2]) )
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kyup(rdist[ss1,ss2]), Vu*dVdVy*kyun(rdist[ss1,ss2]))
#stop
self.variance_U.gradient = np.sum(dkUdvar * dL_dK) # Vu
self.variance_Y.gradient = np.sum(dkYdvar * dL_dK) # Vy
self.lengthscale_U.gradient = np.sum(dktheta1*(-np.sqrt(3)*self.lengthscale_U**(-2))* dL_dK) #lu
self.lengthscale_Y.gradient = np.sum(dktheta2*(-self.lengthscale_Y**(-2)) * dL_dK) #ly

View file

@ -23,7 +23,7 @@ class Add(CombinationKernel):
If a list of parts (of this kernel!) `which_parts` is given, only
the parts of the list are taken to compute the covariance.
"""
assert X.shape[1] == self.input_dim
assert X.shape[1] > max(np.r_[self.active_dims])
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
@ -33,7 +33,7 @@ class Add(CombinationKernel):
@Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim
assert X.shape[1] > max(np.r_[self.active_dims])
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
@ -58,7 +58,12 @@ class Add(CombinationKernel):
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros(X.shape)
[target.__setitem__([Ellipsis, p.active_dims], target[:, p.active_dims]+p.gradients_X(dL_dK, X, X2)) for p in self.parts]
[target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target
def psi0(self, Z, variational_posterior):
@ -131,7 +136,7 @@ class Add(CombinationKernel):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
target[:, p1.active_dims] += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
return target
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
@ -151,8 +156,8 @@ class Add(CombinationKernel):
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
target_mu[:, p1.active_dims] += a
target_S[:, p1.active_dims] += b
target_mu += a
target_S += b
return target_mu, target_S
def _getstate(self):
@ -165,4 +170,11 @@ class Add(CombinationKernel):
def _setstate(self, state):
super(Add, self)._setstate(state)
def add(self, other, name='sum'):
if isinstance(other, Add):
other_params = other._parameters_[:]
for p in other_params:
other.remove_parameter(p)
self.add_parameters(*other_params)
else: self.add_parameter(other)
return self

View file

@ -8,7 +8,7 @@ import itertools
def index_to_slices(index):
"""
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
e.g.
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
@ -39,73 +39,102 @@ class IndependentOutputs(CombinationKernel):
The index of the functions is given by the last column in the input X
the rest of the columns of X are passed to the underlying kernel for computation (in blocks).
Kern is wrapped with a slicer metaclass
:param kernels: either a kernel, or list of kernels to work with. If it is a list of kernels
the indices in the index_dim, index the kernels you gave!
"""
def __init__(self, kern, index_dim=-1, name='independ'):
def __init__(self, kernels, index_dim=-1, name='independ'):
assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the indeces"
super(IndependentOutputs, self).__init__(kernels=[kern], extra_dims=[index_dim], name=name)
if not isinstance(kernels, list):
self.single_kern = True
self.kern = kernels
kernels = [kernels]
else:
self.single_kern = False
self.kern = kernels
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
self.index_dim = index_dim
self.kern = kern
#self.add_parameters(self.kern)
self.kerns = kernels if len(kernels) != 1 else itertools.repeat(kernels[0])
def K(self,X ,X2=None):
slices = index_to_slices(X[:,self.index_dim])
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[[np.copyto(target[s,ss], self.kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices]
[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(self.kerns, slices)]
else:
slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0]))
[[[np.copyto(target[s, s2], self.kern.K(X[s,:],X2[s2,:])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
[[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
return target
def Kdiag(self,X):
slices = index_to_slices(X[:,self.index_dim])
target = np.zeros(X.shape[0])
[[np.copyto(target[s], self.kern.Kdiag(X[s])) for s in slices_i] for slices_i in slices]
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
return target
def update_gradients_full(self,dL_dK,X,X2=None):
target = np.zeros(self.kern.size)
def collate_grads(dL, X, X2):
self.kern.update_gradients_full(dL,X,X2)
target[:] += self.kern.gradient
slices = index_to_slices(X[:,self.index_dim])
if self.single_kern: target = np.zeros(self.kern.size)
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
def collate_grads(kern, i, dL, X, X2):
kern.update_gradients_full(dL,X,X2)
if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient
if X2 is None:
[[collate_grads(dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices]
[[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(self.kerns,slices))]
else:
slices2 = index_to_slices(X2[:,self.index_dim])
[[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
self.kern.gradient = target
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(self.kerns,slices,slices2))]
if self.single_kern: kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
def gradients_X(self,dL_dK, X, X2=None):
target = np.zeros(X.shape)
slices = index_to_slices(X[:,self.index_dim])
if X2 is None:
[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,ss],X[s],X[ss])) for s, ss in itertools.product(slices_i, slices_i)] for slices_i in slices]
# TODO: make use of index_to_slices
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
for kern, s in zip(self.kerns, slices)]
#slices = index_to_slices(X[:,self.index_dim])
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
# for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
#import ipdb;ipdb.set_trace()
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
# np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss]))
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(self.kerns, slices)]
else:
slices2 = index_to_slices(X2[:,self.index_dim])
[[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
slices2 = [X2[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2]))
for kern, s, s2 in zip(self.kerns, slices, slices2)]
# TODO: make work with index_to_slices
#slices = index_to_slices(X[:,self.index_dim])
#slices2 = index_to_slices(X2[:,self.index_dim])
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(self.kerns, slices,slices2)]
return target
def gradients_X_diag(self, dL_dKdiag, X):
slices = index_to_slices(X[:,self.index_dim])
target = np.zeros(X.shape)
[[np.copyto(target[s,self.kern.active_dims], self.kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for slices_i in slices]
[[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(self.kerns, slices)]
return target
def update_gradients_diag(self, dL_dKdiag, X):
target = np.zeros(self.kern.size)
def collate_grads(dL, X):
self.kern.update_gradients_diag(dL,X)
target[:] += self.kern.gradient
slices = index_to_slices(X[:,self.index_dim])
[[collate_grads(dL_dKdiag[s], X[s,:]) for s in slices_i] for slices_i in slices]
self.kern.gradient = target
if self.single_kern: target = np.zeros(self.kern.size)
else: target = [np.zeros(kern.size) for kern, _ in zip(self.kerns, slices)]
def collate_grads(kern, i, dL, X):
kern.update_gradients_diag(dL,X)
if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(self.kerns, slices))]
if self.single_kern: kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(self.kerns, slices))]
class Hierarchical(Kern):
class Hierarchical(CombinationKernel):
"""
A kernel which can reopresent a simple hierarchical model.
@ -116,7 +145,7 @@ class Hierarchical(Kern):
The index of the functions is given by additional columns in the input X.
"""
def __init__(self, kerns, name='hierarchy'):
def __init__(self, kern, name='hierarchy'):
assert all([k.input_dim==kerns[0].input_dim for k in kerns])
super(Hierarchical, self).__init__(kerns[0].input_dim + len(kerns) - 1, name)
self.kerns = kerns

View file

@ -140,12 +140,7 @@ class Kern(Parameterized):
"""
assert isinstance(other, Kern), "only kernels can be added to kernels..."
from add import Add
kernels = []
if isinstance(self, Add): kernels.extend(self._parameters_)
else: kernels.append(self)
if isinstance(other, Add): kernels.extend(other._parameters_)
else: kernels.append(other)
return Add(kernels, name=name)
return Add([self, other], name=name)
def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""

View file

@ -4,6 +4,7 @@ Created on 11 Mar 2014
@author: maxz
'''
from ...core.parameterization.parameterized import ParametersChangedMeta
import numpy as np
class KernCallsViaSlicerMeta(ParametersChangedMeta):
def __call__(self, *args, **kw):
@ -12,18 +13,18 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
instance.Kdiag = _slice_wrapper(instance, instance.Kdiag, diag=True)
instance.update_gradients_full = _slice_wrapper(instance, instance.update_gradients_full, diag=False, derivative=True)
instance.update_gradients_diag = _slice_wrapper(instance, instance.update_gradients_diag, diag=True, derivative=True)
instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True)
instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True)
instance.gradients_X = _slice_wrapper(instance, instance.gradients_X, diag=False, derivative=True, ret_X=True)
instance.gradients_X_diag = _slice_wrapper(instance, instance.gradients_X_diag, diag=True, derivative=True, ret_X=True)
instance.psi0 = _slice_wrapper(instance, instance.psi0, diag=False, derivative=False)
instance.psi1 = _slice_wrapper(instance, instance.psi1, diag=False, derivative=False)
instance.psi2 = _slice_wrapper(instance, instance.psi2, diag=False, derivative=False)
instance.update_gradients_expectations = _slice_wrapper(instance, instance.update_gradients_expectations, derivative=True, psi_stat=True)
instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True)
instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True)
instance.gradients_Z_expectations = _slice_wrapper(instance, instance.gradients_Z_expectations, derivative=True, psi_stat_Z=True, ret_X=True)
instance.gradients_qX_expectations = _slice_wrapper(instance, instance.gradients_qX_expectations, derivative=True, psi_stat=True, ret_X=True)
instance.parameters_changed()
return instance
def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False):
def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False, psi_stat_Z=False, ret_X=False):
"""
This method wraps the functions in kernel to make sure all kernels allways see their respective input dimension.
The different switches are:
@ -34,11 +35,16 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
"""
if derivative:
if diag:
def x_slice_wrapper(dL_dK, X):
def x_slice_wrapper(dL_dKdiag, X):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced:
ret = np.zeros(X.shape)
X = kern._slice_X(X) if not kern._sliced_X else X
# if the return value is of shape X.shape, we need to make sure to return the right shape
kern._sliced_X += 1
try:
ret = operation(dL_dK, X)
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dKdiag, X)
else: ret = operation(dL_dKdiag, X)
except:
raise
finally:
@ -46,10 +52,22 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
return ret
elif psi_stat:
def x_slice_wrapper(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced:
ret1, ret2 = np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
kern._sliced_X += 1
# if the return value is of shape X.shape, we need to make sure to return the right shape
try:
ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
if ret_X_not_sliced:
ret = list(operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior))
r2 = ret[:2]
ret[0] = ret1
ret[1] = ret2
ret[0][:, kern.active_dims] = r2[0]
ret[1][:, kern.active_dims] = r2[1]
del r2
else: ret = operation(dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior)
except:
raise
finally:
@ -57,10 +75,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
return ret
elif psi_stat_Z:
def x_slice_wrapper(dL_dpsi1, dL_dpsi2, Z, variational_posterior):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced: ret = np.zeros(Z.shape)
Z, variational_posterior = kern._slice_X(Z) if not kern._sliced_X else Z, kern._slice_X(variational_posterior) if not kern._sliced_X else variational_posterior
kern._sliced_X += 1
try:
ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
if ret_X_not_sliced:
ret[:, kern.active_dims] = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
else: ret = operation(dL_dpsi1, dL_dpsi2, Z, variational_posterior)
except:
raise
finally:
@ -68,10 +90,14 @@ def _slice_wrapper(kern, operation, diag=False, derivative=False, psi_stat=False
return ret
else:
def x_slice_wrapper(dL_dK, X, X2=None):
ret_X_not_sliced = ret_X and kern._sliced_X == 0
if ret_X_not_sliced:
ret = np.zeros(X.shape)
X, X2 = kern._slice_X(X) if not kern._sliced_X else X, kern._slice_X(X2) if X2 is not None and not kern._sliced_X else X2
kern._sliced_X += 1
try:
ret = operation(dL_dK, X, X2)
if ret_X_not_sliced: ret[:, kern.active_dims] = operation(dL_dK, X, X2)
else: ret = operation(dL_dK, X, X2)
except:
raise
finally:

View file

@ -312,5 +312,4 @@ class Linear(Kern):
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x num_data x input_dim]!
def input_sensitivity(self):
if self.ARD: return self.variances
else: return self.variances.repeat(self.input_dim)
return np.ones(self.input_dim) * self.variances

View file

@ -51,15 +51,15 @@ class Prod(CombinationKernel):
def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape)
for k1,k2 in itertools.combinations(self.parts, 2):
target[:,k1.active_dims] += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2)
target[:,k2.active_dims] += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2)
target += k1.gradients_X(dL_dK*k2.K(X, X2), X, X2)
target += k2.gradients_X(dL_dK*k1.K(X, X2), X, X2)
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
for k1,k2 in itertools.combinations(self.parts, 2):
target[:,k1.active_dims] += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X)
target[:,k2.active_dims] += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X)
target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X)
target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X)
return target

View file

@ -152,12 +152,7 @@ class Stationary(Kern):
This term appears in derviatives.
"""
dist = self._scaled_dist(X, X2).copy()
if X2 is None:
nondiag = util.diag.offdiag_view(dist)
nondiag[:] = 1./nondiag
return dist
else:
return 1./np.where(dist != 0., dist, np.inf)
return 1./np.where(dist != 0., dist, np.inf)
def gradients_X(self, dL_dK, X, X2=None):
"""

View file

@ -95,7 +95,7 @@ class Bernoulli(Likelihood):
else:
return np.nan
def pdf_link(self, link_f, y, extra_data=None):
def pdf_link(self, link_f, y, Y_metadata=None):
"""
Likelihood function given link(f)
@ -106,7 +106,7 @@ class Bernoulli(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:param Y_metadata: Y_metadata not used in bernoulli
:returns: likelihood evaluated for this point
:rtype: float
@ -118,7 +118,7 @@ class Bernoulli(Likelihood):
objective = np.where(y, link_f, 1.-link_f)
return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, extra_data=None):
def logpdf_link(self, link_f, y, Y_metadata=None):
"""
Log Likelihood function given link(f)
@ -129,7 +129,7 @@ class Bernoulli(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:param Y_metadata: Y_metadata not used in bernoulli
:returns: log likelihood evaluated at points link(f)
:rtype: float
"""
@ -140,7 +140,7 @@ class Bernoulli(Likelihood):
np.seterr(**state)
return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
Gradient of the pdf at y, given link(f) w.r.t link(f)
@ -151,7 +151,7 @@ class Bernoulli(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:param Y_metadata: Y_metadata not used in bernoulli
:returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
@ -162,7 +162,7 @@ class Bernoulli(Likelihood):
np.seterr(**state)
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
"""
Hessian at y, given link_f, w.r.t link_f the hessian will be 0 unless i == j
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
@ -175,7 +175,7 @@ class Bernoulli(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:param Y_metadata: Y_metadata not used in bernoulli
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
:rtype: Nx1 array
@ -190,7 +190,7 @@ class Bernoulli(Likelihood):
np.seterr(**state)
return d2logpdf_dlink2
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -201,7 +201,7 @@ class Bernoulli(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in bernoulli
:param Y_metadata: Y_metadata not used in bernoulli
:returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""

View file

@ -18,13 +18,12 @@ class Exponential(Likelihood):
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False):
super(Exponential, self).__init__(gp_link,analytical_mean,analytical_variance)
def __init__(self,gp_link=None):
if gp_link is None:
gp_link = link_functions.Log()
super(Exponential, self).__init__(gp_link, 'ExpLikelihood')
def _preprocess_values(self,Y):
return Y
def pdf_link(self, link_f, y, extra_data=None):
def pdf_link(self, link_f, y, Y_metadata=None):
"""
Likelihood function given link(f)
@ -35,16 +34,15 @@ class Exponential(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
log_objective = link_f*np.exp(-y*link_f)
return np.exp(np.sum(np.log(log_objective)))
#return np.exp(np.sum(-y/link_f - np.log(link_f) ))
def logpdf_link(self, link_f, y, extra_data=None):
def logpdf_link(self, link_f, y, Y_metadata=None):
"""
Log Likelihood Function given link(f)
@ -55,17 +53,16 @@ class Exponential(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
log_objective = np.log(link_f) - y*link_f
#logpdf_link = np.sum(-np.log(link_f) - y/link_f)
return np.sum(log_objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
@ -76,7 +73,7 @@ class Exponential(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
@ -86,7 +83,7 @@ class Exponential(Likelihood):
#grad = y/(link_f**2) - 1./link_f
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -99,7 +96,7 @@ class Exponential(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
@ -112,7 +109,7 @@ class Exponential(Likelihood):
#hess = -2*y/(link_f**3) + 1/(link_f**2)
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -123,7 +120,7 @@ class Exponential(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in exponential distribution
:param Y_metadata: Y_metadata which is not used in exponential distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
@ -132,18 +129,6 @@ class Exponential(Likelihood):
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
return d3lik_dlink3
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)**2
def samples(self, gp):
"""
Returns a set of samples of observations based on a given value of the latent variable.

View file

@ -1,11 +1,12 @@
# Copyright (c) 2012, 2013 Ricardo Andrade
# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import stats,special
import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
from ..core.parameterization import Param
import link_functions
from likelihood import Likelihood
@ -18,14 +19,16 @@ class Gamma(Likelihood):
\\alpha_{i} = \\beta y_{i}
"""
def __init__(self,gp_link=None,analytical_mean=False,analytical_variance=False,beta=1.):
self.beta = beta
super(Gamma, self).__init__(gp_link,analytical_mean,analytical_variance)
def __init__(self,gp_link=None,beta=1.):
if gp_link is None:
gp_link = link_functions.Log()
super(Gamma, self).__init__(gp_link, 'Gamma')
def _preprocess_values(self,Y):
return Y
self.beta = Param('beta', beta)
self.add_parameter(self.beta)
self.beta.fix()#TODO: gradients!
def pdf_link(self, link_f, y, extra_data=None):
def pdf_link(self, link_f, y, Y_metadata=None):
"""
Likelihood function given link(f)
@ -37,7 +40,7 @@ class Gamma(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
@ -47,7 +50,7 @@ class Gamma(Likelihood):
objective = (y**(alpha - 1.) * np.exp(-self.beta*y) * self.beta**alpha)/ special.gamma(alpha)
return np.exp(np.sum(np.log(objective)))
def logpdf_link(self, link_f, y, extra_data=None):
def logpdf_link(self, link_f, y, Y_metadata=None):
"""
Log Likelihood Function given link(f)
@ -59,7 +62,7 @@ class Gamma(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
@ -71,7 +74,7 @@ class Gamma(Likelihood):
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
return np.sum(log_objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
@ -83,7 +86,7 @@ class Gamma(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution
:param Y_metadata: Y_metadata which is not used in gamma distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
@ -94,7 +97,7 @@ class Gamma(Likelihood):
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -108,7 +111,7 @@ class Gamma(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution
:param Y_metadata: Y_metadata which is not used in gamma distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
@ -122,7 +125,7 @@ class Gamma(Likelihood):
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -134,22 +137,10 @@ class Gamma(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in gamma distribution
:param Y_metadata: Y_metadata which is not used in gamma distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
return d3lik_dlink3
def _mean(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)
def _variance(self,gp):
"""
Mass (or density) function
"""
return self.gp_link.transf(gp)/self.beta

View file

@ -35,12 +35,7 @@ class Gaussian(Likelihood):
if gp_link is None:
gp_link = link_functions.Identity()
if isinstance(gp_link, link_functions.Identity):
analytical_variance = True
analytical_mean = True
else:
analytical_variance = False
analytical_mean = False
assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link"
super(Gaussian, self).__init__(gp_link, name=name)
@ -51,14 +46,12 @@ class Gaussian(Likelihood):
self.log_concave = True
def betaY(self,Y,Y_metadata=None):
#TODO: ~Ricardo this does not live here
return Y/self.gaussian_variance(Y_metadata)
def gaussian_variance(self, Y_metadata=None):
return self.variance
def covariance_matrix(self, Y, Y_metadata=None):
return np.eye(Y.shape[0]) * self.variance
def update_gradients(self, grad):
self.variance.gradient = grad
@ -99,10 +92,10 @@ class Gaussian(Likelihood):
def predictive_variance(self, mu, sigma, predictive_mean=None):
return self.variance + sigma**2
def predictive_quantiles(self, mu, var, quantiles, Y_metadata):
return [stats.norm.ppf(q/100.)*np.sqrt(var) + mu for q in quantiles]
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
return [stats.norm.ppf(q/100.)*np.sqrt(var + self.variance) + mu for q in quantiles]
def pdf_link(self, link_f, y, extra_data=None):
def pdf_link(self, link_f, y, Y_metadata=None):
"""
Likelihood function given link(f)
@ -113,14 +106,14 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:param Y_metadata: Y_metadata not used in gaussian
:returns: likelihood evaluated for this point
:rtype: float
"""
#Assumes no covariance, exp, sum, log for numerical stability
return np.exp(np.sum(np.log(stats.norm.pdf(y, link_f, np.sqrt(self.variance)))))
def logpdf_link(self, link_f, y, extra_data=None):
def logpdf_link(self, link_f, y, Y_metadata=None):
"""
Log likelihood function given link(f)
@ -131,7 +124,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:param Y_metadata: Y_metadata not used in gaussian
:returns: log likelihood evaluated for this point
:rtype: float
"""
@ -141,7 +134,7 @@ class Gaussian(Likelihood):
return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
def dlogpdf_dlink(self, link_f, y, extra_data=None):
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
Gradient of the pdf at y, given link(f) w.r.t link(f)
@ -152,7 +145,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:param Y_metadata: Y_metadata not used in gaussian
:returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
@ -161,7 +154,7 @@ class Gaussian(Likelihood):
grad = s2_i*y - s2_i*link_f
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
"""
Hessian at y, given link_f, w.r.t link_f.
i.e. second derivative logpdf at y given link(f_i) link(f_j) w.r.t link(f_i) and link(f_j)
@ -175,7 +168,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:param Y_metadata: Y_metadata not used in gaussian
:returns: Diagonal of log hessian matrix (second derivative of log likelihood evaluated at points link(f))
:rtype: Nx1 array
@ -188,7 +181,7 @@ class Gaussian(Likelihood):
hess = -(1.0/self.variance)*np.ones((N, 1))
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -199,7 +192,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:param Y_metadata: Y_metadata not used in gaussian
:returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array
"""
@ -208,7 +201,7 @@ class Gaussian(Likelihood):
d3logpdf_dlink3 = np.zeros((N,1))
return d3logpdf_dlink3
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
"""
Gradient of the log-likelihood function at y given link(f), w.r.t variance parameter (noise_variance)
@ -219,7 +212,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: float
"""
@ -230,7 +223,7 @@ class Gaussian(Likelihood):
dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
return np.sum(dlik_dsigma) # Sure about this sum?
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
"""
Derivative of the dlogpdf_dlink w.r.t variance parameter (noise_variance)
@ -241,7 +234,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array
"""
@ -250,7 +243,7 @@ class Gaussian(Likelihood):
dlik_grad_dsigma = -s_4*y + s_4*link_f
return dlik_grad_dsigma
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
"""
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (noise_variance)
@ -261,7 +254,7 @@ class Gaussian(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data not used in gaussian
:param Y_metadata: Y_metadata not used in gaussian
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
:rtype: Nx1 array
"""
@ -271,16 +264,16 @@ class Gaussian(Likelihood):
d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
return np.asarray([[dlogpdf_dvar]])
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
return d2logpdf_dlink2_dvar
def _mean(self, gp):

View file

@ -153,6 +153,10 @@ class Likelihood(Parameterized):
return mean
def _conditional_mean(self, f):
"""Quadrature calculation of the conditional mean: E(Y_star|f)"""
raise NotImplementedError, "implement this function to make predictions"
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
"""
Numerical approximation to the predictive variance: V(Y_star)
@ -204,31 +208,31 @@ class Likelihood(Parameterized):
# V(Y_star) = E[ V(Y_star|f_star) ] + E(Y_star**2|f_star) - E[Y_star|f_star]**2
return exp_var + var_exp
def pdf_link(self, link_f, y, extra_data=None):
def pdf_link(self, link_f, y, Y_metadata=None):
raise NotImplementedError
def logpdf_link(self, link_f, y, extra_data=None):
def logpdf_link(self, link_f, y, Y_metadata=None):
raise NotImplementedError
def dlogpdf_dlink(self, link_f, y, extra_data=None):
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
raise NotImplementedError
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
raise NotImplementedError
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
raise NotImplementedError
def dlogpdf_link_dtheta(self, link_f, y, extra_data=None):
def dlogpdf_link_dtheta(self, link_f, y, Y_metadata=None):
raise NotImplementedError
def dlogpdf_dlink_dtheta(self, link_f, y, extra_data=None):
def dlogpdf_dlink_dtheta(self, link_f, y, Y_metadata=None):
raise NotImplementedError
def d2logpdf_dlink2_dtheta(self, link_f, y, extra_data=None):
def d2logpdf_dlink2_dtheta(self, link_f, y, Y_metadata=None):
raise NotImplementedError
def pdf(self, f, y, extra_data=None):
def pdf(self, f, y, Y_metadata=None):
"""
Evaluates the link function link(f) then computes the likelihood (pdf) using it
@ -239,14 +243,14 @@ class Likelihood(Parameterized):
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: likelihood evaluated for this point
:rtype: float
"""
link_f = self.gp_link.transf(f)
return self.pdf_link(link_f, y, extra_data=extra_data)
return self.pdf_link(link_f, y, Y_metadata=Y_metadata)
def logpdf(self, f, y, extra_data=None):
def logpdf(self, f, y, Y_metadata=None):
"""
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
@ -257,14 +261,14 @@ class Likelihood(Parameterized):
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: log likelihood evaluated for this point
:rtype: float
"""
link_f = self.gp_link.transf(f)
return self.logpdf_link(link_f, y, extra_data=extra_data)
return self.logpdf_link(link_f, y, Y_metadata=Y_metadata)
def dlogpdf_df(self, f, y, extra_data=None):
def dlogpdf_df(self, f, y, Y_metadata=None):
"""
Evaluates the link function link(f) then computes the derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
@ -276,16 +280,16 @@ class Likelihood(Parameterized):
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: derivative of log likelihood evaluated for this point
:rtype: 1xN array
"""
link_f = self.gp_link.transf(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f)
return chain_1(dlogpdf_dlink, dlink_df)
def d2logpdf_df2(self, f, y, extra_data=None):
def d2logpdf_df2(self, f, y, Y_metadata=None):
"""
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
@ -297,18 +301,18 @@ class Likelihood(Parameterized):
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
:rtype: 1xN array
"""
link_f = self.gp_link.transf(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f)
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
def d3logpdf_df3(self, f, y, extra_data=None):
def d3logpdf_df3(self, f, y, Y_metadata=None):
"""
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
Uses the Faa di Bruno's formula for the chain rule
@ -320,44 +324,44 @@ class Likelihood(Parameterized):
:type f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution - not used
:param Y_metadata: Y_metadata which is not used in student t distribution - not used
:returns: third derivative of log likelihood evaluated for this point
:rtype: float
"""
link_f = self.gp_link.transf(f)
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, extra_data=extra_data)
d3logpdf_dlink3 = self.d3logpdf_dlink3(link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, extra_data=extra_data)
d2logpdf_dlink2 = self.d2logpdf_dlink2(link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, extra_data=extra_data)
dlogpdf_dlink = self.dlogpdf_dlink(link_f, y, Y_metadata=Y_metadata)
d3link_df3 = self.gp_link.d3transf_df3(f)
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
def dlogpdf_dtheta(self, f, y, extra_data=None):
def dlogpdf_dtheta(self, f, y, Y_metadata=None):
"""
TODO: Doc strings
"""
if self.size > 0:
link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(link_f, y, extra_data=extra_data)
return self.dlogpdf_link_dtheta(link_f, y, Y_metadata=Y_metadata)
else:
#Is no parameters so return an empty array for its derivatives
return np.zeros([1, 0])
def dlogpdf_df_dtheta(self, f, y, extra_data=None):
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
"""
TODO: Doc strings
"""
if self.size > 0:
link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else:
#Is no parameters so return an empty array for its derivatives
return np.zeros([f.shape[0], 0])
def d2logpdf_df2_dtheta(self, f, y, extra_data=None):
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
"""
TODO: Doc strings
"""
@ -365,17 +369,17 @@ class Likelihood(Parameterized):
link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, extra_data=extra_data)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, extra_data=extra_data)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(link_f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(link_f, y, Y_metadata=Y_metadata)
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else:
#Is no parameters so return an empty array for its derivatives
return np.zeros([f.shape[0], 0])
def _laplace_gradients(self, f, y, extra_data=None):
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, extra_data=extra_data)
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, extra_data=extra_data)
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, extra_data=extra_data)
def _laplace_gradients(self, f, y, Y_metadata=None):
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
dlogpdf_df_dtheta = self.dlogpdf_df_dtheta(f, y, Y_metadata=Y_metadata)
d2logpdf_df2_dtheta = self.d2logpdf_df2_dtheta(f, y, Y_metadata=Y_metadata)
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
# ensure we have gradients for every parameter we want to optimize
@ -390,7 +394,7 @@ class Likelihood(Parameterized):
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
"""
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction.
Compute mean, variance of the predictive distibution.
:param mu: mean of the latent variable, f, of posterior
:param var: variance of the latent variable, f, of posterior
@ -407,10 +411,7 @@ class Likelihood(Parameterized):
#compute the quantiles by sampling!!!
N_samp = 1000
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
#ss_f = s.flatten()
#ss_y = self.samples(ss_f, Y_metadata)
ss_y = self.samples(s, Y_metadata)
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]

View file

@ -11,7 +11,7 @@ import itertools
class MixedNoise(Likelihood):
def __init__(self, likelihoods_list, name='mixed_noise'):
#NOTE at the moment this likelihood only works for using a list of gaussians
super(Likelihood, self).__init__(name=name)
self.add_parameters(*likelihoods_list)
@ -24,10 +24,11 @@ class MixedNoise(Likelihood):
variance = np.zeros(ind.size)
for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))):
variance[ind==j] = lik.variance
return variance[:,None]
return variance
def betaY(self,Y,Y_metadata):
return Y/self.gaussian_variance(Y_metadata=Y_metadata)
#TODO not here.
return Y/self.gaussian_variance(Y_metadata=Y_metadata)[:,None]
def update_gradients(self, gradients):
self.gradient = gradients
@ -38,34 +39,27 @@ class MixedNoise(Likelihood):
return np.array([dL_dKdiag[ind==i].sum() for i in range(len(self.likelihoods_list))])
def predictive_values(self, mu, var, full_cov=False, Y_metadata=None):
if all([isinstance(l, Gaussian) for l in self.likelihoods_list]):
ind = Y_metadata['output_index'].flatten()
_variance = np.array([self.likelihoods_list[j].variance for j in ind ])
if full_cov:
var += np.eye(var.shape[0])*_variance
else:
var += _variance
return mu, var
ind = Y_metadata['output_index'].flatten()
_variance = np.array([self.likelihoods_list[j].variance for j in ind ])
if full_cov:
var += np.eye(var.shape[0])*_variance
else:
raise NotImplementedError
var += _variance
return mu, var
def predictive_variance(self, mu, sigma, **other_shit):
if isinstance(noise_index,int):
_variance = self.variance[noise_index]
else:
_variance = np.array([ self.variance[j] for j in noise_index ])[:,None]
def predictive_variance(self, mu, sigma, Y_metadata):
_variance = self.gaussian_variance(Y_metadata)
return _variance + sigma**2
def covariance_matrix(self, Y, Y_metadata):
#assert all([isinstance(l, Gaussian) for l in self.likelihoods_list])
#ind = Y_metadata['output_index'].flatten()
#variance = np.zeros(Y.shape[0])
#for lik, j in zip(self.likelihoods_list, range(len(self.likelihoods_list))):
# variance[ind==j] = lik.variance
#return np.diag(variance)
return np.diag(self.gaussian_variance(Y_metadata).flatten())
def predictive_quantiles(self, mu, var, quantiles, Y_metadata):
ind = Y_metadata['output_index'].flatten()
outputs = np.unique(ind)
Q = np.zeros( (mu.size,len(quantiles)) )
for j in outputs:
q = self.likelihoods_list[j].predictive_quantiles(mu[ind==j,:],
var[ind==j,:],quantiles,Y_metadata=None)
Q[ind==j,:] = np.hstack(q)
return [q[:,None] for q in Q.T]
def samples(self, gp, Y_metadata):
"""
@ -84,4 +78,3 @@ class MixedNoise(Likelihood):
_ysim = np.array([np.random.normal(lik.gp_link.transf(gpj), scale=np.sqrt(lik.variance), size=1) for gpj in gp_filtered.flatten()])
Ysim[flt,:] = _ysim.reshape(n1,N2)
return Ysim

View file

@ -25,10 +25,13 @@ class Poisson(Likelihood):
super(Poisson, self).__init__(gp_link, name='Poisson')
def _preprocess_values(self,Y):
return Y
def _conditional_mean(self, f):
"""
the expected value of y given a value of f
"""
return self.gp_link.transf(gp)
def pdf_link(self, link_f, y, extra_data=None):
def pdf_link(self, link_f, y, Y_metadata=None):
"""
Likelihood function given link(f)
@ -39,14 +42,14 @@ class Poisson(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return np.prod(stats.poisson.pmf(y,link_f))
def logpdf_link(self, link_f, y, extra_data=None):
def logpdf_link(self, link_f, y, Y_metadata=None):
"""
Log Likelihood Function given link(f)
@ -57,7 +60,7 @@ class Poisson(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: likelihood evaluated for this point
:rtype: float
@ -65,7 +68,7 @@ class Poisson(Likelihood):
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return np.sum(-link_f + y*np.log(link_f) - special.gammaln(y+1))
def dlogpdf_dlink(self, link_f, y, extra_data=None):
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
@ -76,7 +79,7 @@ class Poisson(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
@ -84,7 +87,7 @@ class Poisson(Likelihood):
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
return y/link_f - 1
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -97,7 +100,7 @@ class Poisson(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
@ -112,7 +115,7 @@ class Poisson(Likelihood):
#transf = self.gp_link.transf(gp)
#return obs * ((self.gp_link.dtransf_df(gp)/transf)**2 - d2_df/transf) + d2_df
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -123,7 +126,7 @@ class Poisson(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in poisson distribution
:param Y_metadata: Y_metadata which is not used in poisson distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""

View file

@ -46,7 +46,7 @@ class StudentT(Likelihood):
self.sigma2.gradient = grads[0]
self.v.gradient = grads[1]
def pdf_link(self, link_f, y, extra_data=None):
def pdf_link(self, link_f, y, Y_metadata=None):
"""
Likelihood function given link(f)
@ -57,7 +57,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
"""
@ -70,7 +70,7 @@ class StudentT(Likelihood):
)
return np.prod(objective)
def logpdf_link(self, link_f, y, extra_data=None):
def logpdf_link(self, link_f, y, Y_metadata=None):
"""
Log Likelihood Function given link(f)
@ -81,7 +81,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: likelihood evaluated for this point
:rtype: float
@ -99,7 +99,7 @@ class StudentT(Likelihood):
)
return np.sum(objective)
def dlogpdf_dlink(self, link_f, y, extra_data=None):
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
Gradient of the log likelihood function at y, given link(f) w.r.t link(f)
@ -110,7 +110,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: gradient of likelihood evaluated at points
:rtype: Nx1 array
@ -120,7 +120,7 @@ class StudentT(Likelihood):
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad
def d2logpdf_dlink2(self, link_f, y, extra_data=None):
def d2logpdf_dlink2(self, link_f, y, Y_metadata=None):
"""
Hessian at y, given link(f), w.r.t link(f)
i.e. second derivative logpdf at y given link(f_i) and link(f_j) w.r.t link(f_i) and link(f_j)
@ -133,7 +133,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: Diagonal of hessian matrix (second derivative of likelihood evaluated at points f)
:rtype: Nx1 array
@ -146,7 +146,7 @@ class StudentT(Likelihood):
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess
def d3logpdf_dlink3(self, link_f, y, extra_data=None):
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
"""
Third order derivative log-likelihood function at y given link(f) w.r.t link(f)
@ -157,7 +157,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array
"""
@ -168,7 +168,7 @@ class StudentT(Likelihood):
)
return d3lik_dlink3
def dlogpdf_link_dvar(self, link_f, y, extra_data=None):
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
"""
Gradient of the log-likelihood function at y given f, w.r.t variance parameter (t_noise)
@ -179,7 +179,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float
"""
@ -188,7 +188,7 @@ class StudentT(Likelihood):
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
return np.sum(dlogpdf_dvar)
def dlogpdf_dlink_dvar(self, link_f, y, extra_data=None):
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
"""
Derivative of the dlogpdf_dlink w.r.t variance parameter (t_noise)
@ -199,7 +199,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array
"""
@ -208,7 +208,7 @@ class StudentT(Likelihood):
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlogpdf_dlink_dvar
def d2logpdf_dlink2_dvar(self, link_f, y, extra_data=None):
def d2logpdf_dlink2_dvar(self, link_f, y, Y_metadata=None):
"""
Gradient of the hessian (d2logpdf_dlink2) w.r.t variance parameter (t_noise)
@ -219,7 +219,7 @@ class StudentT(Likelihood):
:type link_f: Nx1 array
:param y: data
:type y: Nx1 array
:param extra_data: extra_data which is not used in student t distribution
:param Y_metadata: Y_metadata which is not used in student t distribution
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array
"""
@ -230,25 +230,22 @@ class StudentT(Likelihood):
)
return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, extra_data=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, extra_data=extra_data)
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dvar, dlogpdf_dv))
def dlogpdf_dlink_dtheta(self, f, y, extra_data=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, extra_data=extra_data)
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
def d2logpdf_dlink2_dtheta(self, f, y, extra_data=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, extra_data=extra_data)
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def predictive_mean(self, mu, sigma, Y_metadata=None):
"""
Compute mean of the prediction
"""
return self.gp_link.transf(mu) # only true in link is monotoci, which it is.
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):

View file

@ -3,6 +3,7 @@
import numpy as np
from ..core.mapping import Mapping
from ..core.parameterization import Param
class Linear(Mapping):
"""
@ -16,38 +17,22 @@ class Linear(Mapping):
:type X: ndarray
:param output_dim: dimension of output.
:type output_dim: int
"""
def __init__(self, input_dim=1, output_dim=1):
self.name = 'linear'
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.num_params = self.output_dim*(self.input_dim + 1)
self.W = np.array((self.input_dim, self.output_dim))
self.bias = np.array(self.output_dim)
self.randomize()
def _get_param_names(self):
return sum([['W_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.input_dim)], []) + ['bias_%i' % d for d in range(self.output_dim)]
def _get_params(self):
return np.hstack((self.W.flatten(), self.bias))
def _set_params(self, x):
self.W = x[:self.input_dim * self.output_dim].reshape(self.input_dim, self.output_dim).copy()
self.bias = x[self.input_dim*self.output_dim:].copy()
def randomize(self):
self.W = np.random.randn(self.input_dim, self.output_dim)/np.sqrt(self.input_dim + 1)
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.input_dim + 1)
def __init__(self, input_dim=1, output_dim=1, name='linear_map'):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
self.W = Param('W',np.array((self.input_dim, self.output_dim)))
self.bias = Param('bias',np.array(self.output_dim))
self.add_parameters(self.W, self.bias)
def f(self, X):
return np.dot(X,self.W) + self.bias
def df_dtheta(self, dL_df, X):
self._df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
self._df_dbias = (dL_df.sum(0))
return np.hstack((self._df_dW.flatten(), self._df_dbias))
def df_dX(self, dL_df, X):
return (dL_df[:, None, :]*self.W[None, :, :]).sum(2)
df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
df_dbias = (dL_df.sum(0))
return np.hstack((df_dW.flatten(), df_dbias))
def dL_dX(self, dL_df, X):
return (dL_df[:, None, :]*self.W[None, :, :]).sum(2)

View file

@ -75,15 +75,19 @@ class BayesianGPLVM(SparseGP):
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X)
def plot_latent(self, plot_inducing=True, *args, **kwargs):
"""
See GPy.plotting.matplot_dep.dim_reduction_plots.plot_latent
"""
def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
fignum=None, plot_inducing=True, legend=True,
plot_limits=None,
aspect='auto', updates=False, **kwargs):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs)
return dim_reduction_plots.plot_latent(self, labels, which_indices,
resolution, ax, marker, s,
fignum, plot_inducing, legend,
plot_limits, aspect, updates, **kwargs)
def do_test_latents(self, Y):
"""

View file

@ -36,7 +36,7 @@ class GPCoregionalizedRegression(GP):
#Kernel
if kernel is None:
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.rbf(X.shape[1]-1), W_rank=1,name=kernel_name)
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=kern.RBF(X.shape[1]-1), W_rank=1,name=kernel_name)
#Likelihood
likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list)

View file

@ -20,14 +20,14 @@ class GPRegression(GP):
"""
def __init__(self, X, Y, kernel=None):
def __init__(self, X, Y, kernel=None, Y_metadata=None):
if kernel is None:
kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Gaussian()
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression')
super(GPRegression, self).__init__(X, Y, kernel, likelihood, name='GP regression', Y_metadata=Y_metadata)
def _getstate(self):
return GP._getstate(self)

View file

@ -67,12 +67,22 @@ class GPLVM(GP):
assert self.likelihood.Y.shape[1] == 2
pb.scatter(self.likelihood.Y[:, 0], self.likelihood.Y[:, 1], 40, self.X[:, 0].copy(), linewidth=0, cmap=pb.cm.jet) # @UndefinedVariable
Xnew = np.linspace(self.X.min(), self.X.max(), 200)[:, None]
mu, var, upper, lower = self.predict(Xnew)
mu, _ = self.predict(Xnew)
pb.plot(mu[:, 0], mu[:, 1], 'k', linewidth=1.5)
def plot_latent(self, *args, **kwargs):
def plot_latent(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
fignum=None, legend=True,
plot_limits=None,
aspect='auto', updates=False, **kwargs):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_latent(self, *args, **kwargs)
return dim_reduction_plots.plot_latent(self, labels, which_indices,
resolution, ax, marker, s,
fignum, False, legend,
plot_limits, aspect, updates, **kwargs)
def plot_magnification(self, *args, **kwargs):
return util.plot_latent.plot_magnification(self, *args, **kwargs)

View file

@ -43,14 +43,14 @@ class SparseGPCoregionalizedRegression(SparseGP):
#Kernel
if kernel is None:
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.rbf(X.shape[1]-1), W_rank=1,name=kernel_name)
kernel = util.multioutput.ICM(input_dim=X.shape[1]-1, num_outputs=Ny, kernel=kern.RBF(X.shape[1]-1), W_rank=1,name=kernel_name)
#Likelihood
likelihood = util.multioutput.build_likelihood(Y_list,self.output_index,likelihoods_list)
#Inducing inputs list
if len(Z_list):
assert len(Z_list) == self.output_dim, 'Number of outputs do not match length of inducing inputs list.'
assert len(Z_list) == Ny, 'Number of outputs do not match length of inducing inputs list.'
else:
if isinstance(num_inducing,np.int):
num_inducing = [num_inducing] * Ny

View file

@ -4,7 +4,7 @@
import unittest
import numpy as np
import GPy
class MappingTests(unittest.TestCase):
@ -23,12 +23,11 @@ class MappingTests(unittest.TestCase):
def test_mlpmapping(self):
verbose = False
mapping = GPy.mappings.MLP(input_dim=2, hidden_dim=[3, 4, 8, 2], output_dim=2)
mapping = GPy.mappings.MLP(input_dim=2, hidden_dim=[3, 4, 8, 2], output_dim=2)
self.assertTrue(GPy.core.Mapping_check_df_dtheta(mapping=mapping).checkgrad(verbose=verbose))
self.assertTrue(GPy.core.Mapping_check_df_dX(mapping=mapping).checkgrad(verbose=verbose))
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -30,7 +30,8 @@ def most_significant_input_dimensions(model, which_indices):
def plot_latent(model, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
fignum=None, plot_inducing=False, legend=True,
aspect='auto', updates=False):
plot_limits=None,
aspect='auto', updates=False, **kwargs):
"""
:param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
:param resolution: the resolution of the grid on which to evaluate the predictive variance
@ -38,6 +39,8 @@ def plot_latent(model, labels=None, which_indices=None,
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
else:
fig = ax.figure
Tango.reset()
if labels is None:
@ -57,15 +60,28 @@ def plot_latent(model, labels=None, which_indices=None,
def plot_function(x):
Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
Xtest_full[:, [input_1, input_2]] = x
mu, var, low, up = model.predict(Xtest_full)
_, var = model.predict(Xtest_full)
var = var[:, :1]
return np.log(var)
#Create an IMshow controller that can re-plot the latent space shading at a good resolution
if plot_limits is None:
xmin, ymin = X[:, [input_1, input_2]].min(0)
xmax, ymax = X[:, [input_1, input_2]].max(0)
x_r, y_r = xmax-xmin, ymax-ymin
xmin -= .1*x_r
xmax += .1*x_r
ymin -= .1*y_r
ymax += .1*y_r
else:
try:
xmin, xmax, ymin, ymax = plot_limits
except (TypeError, ValueError) as e:
raise e.__class__, "Wrong plot limits: {} given -> need (xmin, xmax, ymin, ymax)".format(plot_limits)
view = ImshowController(ax, plot_function,
tuple(X[:, [input_1, input_2]].min(0)) + tuple(X[:, [input_1, input_2]].max(0)),
(xmin, ymin, xmax, ymax),
resolution, aspect=aspect, interpolation='bilinear',
cmap=pb.cm.binary)
cmap=pb.cm.binary, **kwargs)
# make sure labels are in order of input:
ulabels = []
@ -99,18 +115,31 @@ def plot_latent(model, labels=None, which_indices=None,
if not np.all(labels == 1.) and legend:
ax.legend(loc=0, numpoints=1)
#ax.set_xlim(xmin[0], xmax[0])
#ax.set_ylim(xmin[1], xmax[1])
ax.grid(b=False) # remove the grid if present, it doesn't look good
ax.set_aspect('auto') # set a nice aspect ratio
if plot_inducing:
Z = param_to_array(model.Z)
ax.plot(Z[:, input_1], Z[:, input_2], '^w')
ax.set_xlim((xmin, xmax))
ax.set_ylim((ymin, ymax))
try:
fig.canvas.draw()
fig.tight_layout()
fig.canvas.draw()
except Exception as e:
print "Could not invoke tight layout: {}".format(e)
pass
if updates:
ax.figure.canvas.show()
try:
ax.figure.canvas.show()
except Exception as e:
print "Could not invoke show: {}".format(e)
raw_input('Enter to continue')
view.deactivate()
return ax
def plot_magnification(model, labels=None, which_indices=None,
@ -186,7 +215,7 @@ def plot_magnification(model, labels=None, which_indices=None,
ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w')
if updates:
ax.figure.canvas.show()
fig.canvas.show()
raw_input('Enter to continue')
pb.title('Magnification Factor')

View file

@ -33,7 +33,7 @@ class AxisChangedController(AxisEventController):
Constructor
'''
super(AxisChangedController, self).__init__(ax)
self._lim_ratio_threshold = update_lim or .8
self._lim_ratio_threshold = update_lim or .95
self._x_lim = self.ax.get_xlim()
self._y_lim = self.ax.get_ylim()
@ -80,6 +80,10 @@ class AxisChangedController(AxisEventController):
class BufferedAxisChangedController(AxisChangedController):
def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=None, **kwargs):
"""
Buffered axis changed controller. Controls the buffer and handles update events for when the axes changed.
Updated plotting will be after first reload (first time will be within plot limits, after that the limits will be buffered)
:param plot_function:
function to use for creating image for plotting (return ndarray-like)
plot_function gets called with (2D!) Xtest grid if replotting required
@ -91,11 +95,13 @@ class BufferedAxisChangedController(AxisChangedController):
"""
super(BufferedAxisChangedController, self).__init__(ax, update_lim=update_lim)
self.plot_function = plot_function
xmin, xmax = self._x_lim # self._compute_buffered(*self._x_lim)
ymin, ymax = self._y_lim # self._compute_buffered(*self._y_lim)
xmin, ymin, xmax, ymax = plot_limits#self._x_lim # self._compute_buffered(*self._x_lim)
# imshow acts on the limits of the plot, this is why we need to override the limits here, to make sure the right plot limits are used:
self._x_lim = xmin, xmax
self._y_lim = ymin, ymax
self.resolution = resolution
self._not_init = False
self.view = self._init_view(self.ax, self.recompute_X(), xmin, xmax, ymin, ymax, **kwargs)
self.view = self._init_view(self.ax, self.recompute_X(buffered=False), xmin, xmax, ymin, ymax, **kwargs)
self._not_init = True
def update(self, ax):
@ -111,14 +117,16 @@ class BufferedAxisChangedController(AxisChangedController):
def update_view(self, view, X, xmin, xmax, ymin, ymax):
raise NotImplementedError('update view given in here')
def get_grid(self):
xmin, xmax = self._compute_buffered(*self._x_lim)
ymin, ymax = self._compute_buffered(*self._y_lim)
def get_grid(self, buffered=True):
if buffered: comp = self._compute_buffered
else: comp = lambda a,b: (a,b)
xmin, xmax = comp(*self._x_lim)
ymin, ymax = comp(*self._y_lim)
x, y = numpy.mgrid[xmin:xmax:1j * self.resolution, ymin:ymax:1j * self.resolution]
return numpy.hstack((x.flatten()[:, None], y.flatten()[:, None]))
def recompute_X(self):
X = self.plot_function(self.get_grid())
def recompute_X(self, buffered=True):
X = self.plot_function(self.get_grid(buffered))
if isinstance(X, (tuple, list)):
for x in X:
x.shape = [self.resolution, self.resolution]

View file

@ -9,7 +9,7 @@ import numpy
class ImshowController(BufferedAxisChangedController):
def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=.5, **kwargs):
def __init__(self, ax, plot_function, plot_limits, resolution=50, update_lim=.8, **kwargs):
"""
:param plot_function:
function to use for creating image for plotting (return ndarray-like)

View file

@ -123,6 +123,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#add inducing inputs (if a sparse model is used)
if hasattr(model,"Z"):
#Zu = model.Z[:,free_dims] * model._Xscale[:,free_dims] + model._Xoffset[:,free_dims]
if isinstance(model,SparseGPCoregionalizedRegression):
Z = Z[Z[:,-1] == Y_metadata['output_index'],:]
Zu = Z[:,free_dims]
z_height = ax.get_ylim()[0]
plots['inducing_inputs'] = ax.plot(Zu, np.zeros_like(Zu) + z_height, 'r|', mew=1.5, markersize=12)

View file

@ -4,6 +4,8 @@ import GPy
import numpy as np
import matplotlib as mpl
import time
from ...util.misc import param_to_array
from GPy.core.parameterization.variational import VariationalPosterior
try:
import visual
visual_available = True
@ -72,12 +74,13 @@ class vector_show(matplotlib_show):
"""
def __init__(self, vals, axes=None):
matplotlib_show.__init__(self, vals, axes)
self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals.T)[0]
self.handle = self.axes.plot(np.arange(0, len(vals))[:, None], self.vals)
def modify(self, vals):
self.vals = vals.copy()
xdata, ydata = self.handle.get_data()
self.handle.set_data(xdata, self.vals.T)
for handle, vals in zip(self.handle, self.vals.T):
xdata, ydata = handle.get_data()
handle.set_data(xdata, vals)
self.axes.figure.canvas.draw()
@ -91,8 +94,12 @@ class lvm(matplotlib_show):
:param latent_axes: the axes where the latent visualization should be plotted.
"""
if vals == None:
vals = model.X[0]
if isinstance(model.X, VariationalPosterior):
vals = param_to_array(model.X.mean)
else:
vals = param_to_array(model.X)
vals = param_to_array(vals)
matplotlib_show.__init__(self, vals, axes=latent_axes)
if isinstance(latent_axes,mpl.axes.Axes):

34
GPy/testing/fitc.py Normal file
View file

@ -0,0 +1,34 @@
# Copyright (c) 2014, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class FITCtest(unittest.TestCase):
def setUp(self):
######################################
# # 1 dimensional example
N = 20
# sample inputs and outputs
self.X1D = np.random.uniform(-3., 3., (N, 1))
self.Y1D = np.sin(self.X1D) + np.random.randn(N, 1) * 0.05
######################################
# # 2 dimensional example
# sample inputs and outputs
self.X2D = np.random.uniform(-3., 3., (N, 2))
self.Y2D = np.sin(self.X2D[:, 0:1]) * np.sin(self.X2D[:, 1:2]) + np.random.randn(N, 1) * 0.05
def test_fitc_1d(self):
m = GPy.models.SparseGPRegression(self.X1D, self.Y1D)
m.inference_method=GPy.inference.latent_function_inference.FITC()
self.assertTrue(m.checkgrad())
def test_fitc_2d(self):
m = GPy.models.SparseGPRegression(self.X2D, self.Y2D)
m.inference_method=GPy.inference.latent_function_inference.FITC()
self.assertTrue(m.checkgrad())

View file

@ -94,7 +94,7 @@ class Kern_check_dKdiag_dX(Kern_check_dK_dX):
def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False):
def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verbose=False, fixed_X_dims=None):
"""
This function runs on kernels to check the correctness of their
implementation. It checks that the covariance function is positive definite
@ -109,19 +109,17 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
"""
pass_checks = True
if X==None:
if X is None:
X = np.random.randn(10, kern.input_dim)
if output_ind is not None:
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
if X2==None:
if X2 is None:
X2 = np.random.randn(20, kern.input_dim)
if output_ind is not None:
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
if verbose:
print("Checking covariance function is positive definite.")
#if isinstance(kern, GPy.kern.IndependentOutputs):
#import ipdb; ipdb.set_trace() # XXX BREAKPOINT
result = Kern_check_model(kern, X=X).is_positive_semi_definite()
if result and verbose:
print("Check passed.")
@ -154,7 +152,12 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if verbose:
print("Checking gradients of Kdiag(X) wrt theta.")
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
try:
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
print("update_gradients_diag not implemented for " + kern.name)
if result and verbose:
print("Check passed.")
if not result:
@ -166,7 +169,10 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if verbose:
print("Checking gradients of K(X, X) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
testmodel = Kern_check_dK_dX(kern, X=X, X2=None)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
@ -175,14 +181,17 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
testmodel.checkgrad(verbose=True)
pass_checks = False
return False
if verbose:
print("Checking gradients of K(X, X2) wrt X.")
try:
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
testmodel = Kern_check_dK_dX(kern, X=X, X2=X2)
if fixed_X_dims is not None:
testmodel.X[:,fixed_X_dims].fix()
result = testmodel.checkgrad(verbose=verbose)
except NotImplementedError:
result=True
if verbose:
@ -190,8 +199,8 @@ def check_kernel_gradient_functions(kern, X=None, X2=None, output_ind=None, verb
if result and verbose:
print("Check passed.")
if not result:
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
print("Gradient of K(X, X2) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
testmodel.checkgrad(verbose=True)
pass_checks = False
return False
@ -236,9 +245,22 @@ class KernelGradientTestsContinuous(unittest.TestCase):
def test_Add(self):
k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
k += GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
k.randomize()
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
def test_Add_dims(self):
k = GPy.kern.Matern32(2, active_dims=[2,self.D]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
k.randomize()
self.assertRaises(AssertionError, k.K, self.X)
k = GPy.kern.Matern32(2, active_dims=[2,self.D-1]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
k.randomize()
# assert it runs:
try:
k.K(self.X)
except AssertionError:
raise AssertionError, "k.K(X) should run on self.D-1 dimension"
def test_Matern52(self):
k = GPy.kern.Matern52(self.D)
k.randomize()
@ -302,29 +324,57 @@ class KernelTestsMiscellaneous(unittest.TestCase):
class KernelTestsNonContinuous(unittest.TestCase):
def setUp(self):
N = 100
N1 = 110
self.D = 2
D = self.D
self.X = np.random.randn(N,D)
self.X2 = np.random.randn(N1,D)
#self.X_block = np.zeros((N+N1, D+D+1))
#self.X_block[0:N, 0:D] = self.X
#self.X_block[N:N+N1, D:D+D] = self.X2
#self.X_block[0:N, -1] = 0
#self.X_block[N:N+N1, -1] = 1
self.X_block = np.zeros((N+N1, D+1))
self.X_block[0:N, 0:D] = self.X
self.X_block[N:N+N1, 0:D] = self.X2
self.X_block[0:N, -1] = 0
self.X_block[N:N+N1, -1] = 1
self.X_block = self.X_block[self.X_block.argsort(0)[:, -1], :]
N0 = 3
N1 = 9
N2 = 4
N = N0+N1+N2
self.D = 3
self.X = np.random.randn(N, self.D+1)
indices = np.random.random_integers(0, 2, size=N)
self.X[indices==0, -1] = 0
self.X[indices==1, -1] = 1
self.X[indices==2, -1] = 2
#self.X = self.X[self.X[:, -1].argsort(), :]
self.X2 = np.random.randn((N0+N1)*2, self.D+1)
self.X2[:(N0*2), -1] = 0
self.X2[(N0*2):, -1] = 1
def test_IndependentOutputs(self):
k = GPy.kern.RBF(self.D)
kern = GPy.kern.IndependentOutputs(k, -1)
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X_block, verbose=verbose))
kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(self.D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
self.assertTrue(check_kernel_gradient_functions(kern, X=self.X, X2=self.X2, verbose=verbose, fixed_X_dims=-1))
def test_ODE_UY(self):
kern = GPy.kern.ODE_UY(2, active_dims=[0, self.D])
X = self.X[self.X[:,-1]!=2]
X2 = self.X2[self.X2[:,-1]!=2]
self.assertTrue(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()
#unittest.main()
np.random.seed(0)
N0 = 3
N1 = 9
N2 = 4
N = N0+N1+N2
D = 3
X = np.random.randn(N, D+1)
indices = np.random.random_integers(0, 2, size=N)
X[indices==0, -1] = 0
X[indices==1, -1] = 1
X[indices==2, -1] = 2
#X = X[X[:, -1].argsort(), :]
X2 = np.random.randn((N0+N1)*2, D+1)
X2[:(N0*2), -1] = 0
X2[(N0*2):, -1] = 1
k = [GPy.kern.RBF(1, active_dims=[1], name='rbf1'), GPy.kern.RBF(D, name='rbf012'), GPy.kern.RBF(2, active_dims=[0,2], name='rbf02')]
kern = GPy.kern.IndependentOutputs(k, -1, name='ind_split')
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))
k = GPy.kern.RBF(D)
kern = GPy.kern.IndependentOutputs(k, -1, 'ind_single')
assert(check_kernel_gradient_functions(kern, X=X, X2=X2, verbose=verbose, fixed_X_dims=-1))

View file

@ -255,21 +255,21 @@ class TestNoiseModels(object):
"Y": self.binary_Y,
"ep": False # FIXME: Should be True when we have it working again
},
#"Exponential_default": {
#"model": GPy.likelihoods.exponential(),
#"link_f_constraints": [constrain_positive],
#"Y": self.positive_Y,
#"laplace": True,
#},
#"Poisson_default": {
#"model": GPy.likelihoods.poisson(),
#"link_f_constraints": [constrain_positive],
#"Y": self.integer_Y,
#"laplace": True,
#"ep": False #Should work though...
#},
#"Gamma_default": {
#"model": GPy.likelihoods.gamma(),
"Exponential_default": {
"model": GPy.likelihoods.Exponential(),
"link_f_constraints": [constrain_positive],
"Y": self.positive_Y,
"laplace": True,
},
"Poisson_default": {
"model": GPy.likelihoods.Poisson(),
"link_f_constraints": [constrain_positive],
"Y": self.integer_Y,
"laplace": True,
"ep": False #Should work though...
}#,
#GAMMA needs some work!"Gamma_default": {
#"model": GPy.likelihoods.Gamma(),
#"link_f_constraints": [constrain_positive],
#"Y": self.positive_Y,
#"laplace": True
@ -589,7 +589,8 @@ class LaplaceTests(unittest.TestCase):
self.var = np.random.rand(1)
self.stu_t = GPy.likelihoods.StudentT(deg_free=5, sigma2=self.var)
self.gauss = GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var)
#TODO: gaussians with on Identity link. self.gauss = GPy.likelihoods.Gaussian(gp_link=link_functions.Log(), variance=self.var)
self.gauss = GPy.likelihoods.Gaussian(variance=self.var)
#Make a bigger step as lower bound can be quite curved
self.step = 1e-6
@ -604,7 +605,6 @@ class LaplaceTests(unittest.TestCase):
def test_gaussian_d2logpdf_df2_2(self):
print "\n{}".format(inspect.stack()[0][3])
self.Y = None
self.gauss = None
self.N = 2
self.D = 1
@ -613,7 +613,6 @@ class LaplaceTests(unittest.TestCase):
noise = np.random.randn(*self.X.shape)*self.real_std
self.Y = np.sin(self.X*2*np.pi) + noise
self.f = np.random.rand(self.N, 1)
self.gauss = GPy.likelihoods.Gaussian(variance=self.var)
dlogpdf_df = functools.partial(self.gauss.dlogpdf_df, y=self.Y)
d2logpdf_df2 = functools.partial(self.gauss.d2logpdf_df2, y=self.Y)

View file

@ -6,6 +6,60 @@ import unittest
import numpy as np
import GPy
class MiscTests(unittest.TestCase):
def setUp(self):
self.N = 20
self.N_new = 50
self.D = 1
self.X = np.random.uniform(-3., 3., (self.N, 1))
self.Y = np.sin(self.X) + np.random.randn(self.N, self.D) * 0.05
self.X_new = np.random.uniform(-3., 3., (self.N_new, 1))
def test_raw_predict(self):
k = GPy.kern.RBF(1)
m = GPy.models.GPRegression(self.X, self.Y, kernel=k)
m.randomize()
Kinv = np.linalg.pinv(k.K(self.X) + np.eye(self.N)*m.Gaussian_noise.variance)
K_hat = k.K(self.X_new) - k.K(self.X_new, self.X).dot(Kinv).dot(k.K(self.X, self.X_new))
mu_hat = k.K(self.X_new, self.X).dot(Kinv).dot(self.Y)
mu, covar = m._raw_predict(self.X_new, full_cov=True)
self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(covar.shape, (self.N_new, self.N_new))
np.testing.assert_almost_equal(K_hat, covar)
np.testing.assert_almost_equal(mu_hat, mu)
mu, var = m._raw_predict(self.X_new)
self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(var.shape, (self.N_new, 1))
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
np.testing.assert_almost_equal(mu_hat, mu)
def test_sparse_raw_predict(self):
k = GPy.kern.RBF(1)
m = GPy.models.SparseGPRegression(self.X, self.Y, kernel=k)
m.randomize()
Z = m.Z[:]
X = self.X[:]
#Not easy to check if woodbury_inv is correct in itself as it requires a large derivation and expression
Kinv = m.posterior.woodbury_inv
K_hat = k.K(self.X_new) - k.K(self.X_new, Z).dot(Kinv).dot(k.K(Z, self.X_new))
mu, covar = m._raw_predict(self.X_new, full_cov=True)
self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(covar.shape, (self.N_new, self.N_new))
np.testing.assert_almost_equal(K_hat, covar)
#np.testing.assert_almost_equal(mu_hat, mu)
mu, var = m._raw_predict(self.X_new)
self.assertEquals(mu.shape, (self.N_new, self.D))
self.assertEquals(var.shape, (self.N_new, 1))
np.testing.assert_almost_equal(np.diag(K_hat)[:, None], var)
#np.testing.assert_almost_equal(mu_hat, mu)
class GradientTests(unittest.TestCase):
def setUp(self):
######################################
@ -198,6 +252,7 @@ class GradientTests(unittest.TestCase):
m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k)
self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_GP_EP_probit(self):
N = 20
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
@ -207,6 +262,7 @@ class GradientTests(unittest.TestCase):
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_sparse_EP_DTC_probit(self):
N = 20
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
@ -221,6 +277,7 @@ class GradientTests(unittest.TestCase):
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
@unittest.expectedFailure
def test_generalized_FITC(self):
N = 20
X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None]

View file

@ -8,7 +8,7 @@ from GPy.core.parameterization.parameterized import Parameterized
from GPy.core.parameterization.param import Param
import numpy
# One trigger in init
# One trigger in init
_trigger_start = -1
class ParamTestParent(Parameterized):
@ -21,11 +21,9 @@ class ParameterizedTest(Parameterized):
params_changed_count = _trigger_start
def parameters_changed(self):
self.params_changed_count += 1
def _set_params(self, params, trigger_parent=True):
Parameterized._set_params(self, params, trigger_parent=trigger_parent)
class Test(unittest.TestCase):
def setUp(self):
self.parent = ParamTestParent('test parent')
self.par = ParameterizedTest('test model')
@ -41,12 +39,12 @@ class Test(unittest.TestCase):
self.parent.add_parameter(self.par)
self.parent.add_parameter(self.par2)
self._observer_triggered = None
self._trigger_count = 0
self._first = None
self._second = None
def _trigger(self, which):
self._observer_triggered = float(which)
self._trigger_count += 1
@ -54,18 +52,18 @@ class Test(unittest.TestCase):
self._second = self._trigger
else:
self._first = self._trigger
def _trigger_priority(self, which):
if self._first is not None:
self._second = self._trigger_priority
else:
self._first = self._trigger_priority
def test_observable(self):
self.par.add_observer(self, self._trigger, -1)
self.assertEqual(self.par.params_changed_count, 0, 'no params changed yet')
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
self.p[0,1] = 3 # trigger observers
self.assertEqual(self._observer_triggered, 3, 'observer should have triggered')
self.assertEqual(self._trigger_count, 1, 'observer should have triggered once')
@ -78,14 +76,14 @@ class Test(unittest.TestCase):
self.assertEqual(self._trigger_count, 1, 'observer should have triggered once')
self.assertEqual(self.par.params_changed_count, 2, 'params changed second')
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
self.par.add_observer(self, self._trigger, -1)
self.p[2,1] = 4
self.assertEqual(self._observer_triggered, 4, 'observer should have triggered')
self.assertEqual(self._trigger_count, 2, 'observer should have triggered once')
self.assertEqual(self.par.params_changed_count, 3, 'params changed second')
self.assertEqual(self.par.params_changed_count, self.parent.parent_changed_count, 'parent should be triggered as often as param')
self.par.remove_observer(self, self._trigger)
self.p[0,1] = 3
self.assertEqual(self._observer_triggered, 4, 'observer should not have triggered')
@ -99,7 +97,7 @@ class Test(unittest.TestCase):
self.par._trigger_params_changed()
self.assertEqual(self.par.params_changed_count, 1, 'now params changed')
self.assertEqual(self.parent.parent_changed_count, self.par.params_changed_count)
self.par._param_array_[:] = 2
self.par._trigger_params_changed()
self.assertEqual(self.par.params_changed_count, 2, 'now params changed')
@ -125,13 +123,13 @@ class Test(unittest.TestCase):
self.par.remove_observer(self)
self._first = self._second = None
self.par.add_observer(self, self._trigger, 1)
self.par.add_observer(self, self._trigger_priority, 0)
self.par.notify_observers(0)
self.assertEqual(self._first, self._trigger, 'priority should be second')
self.assertEqual(self._second, self._trigger_priority, 'priority should be second')
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']

View file

@ -7,16 +7,16 @@ import unittest
import GPy
import numpy as np
from GPy.core.parameterization.parameter_core import HierarchyError
from GPy.core.parameterization.array_core import ObservableArray
from GPy.core.parameterization.array_core import ObsAr
class ArrayCoreTest(unittest.TestCase):
def setUp(self):
self.X = np.random.normal(1,1, size=(100,10))
self.obsX = ObservableArray(self.X)
self.obsX = ObsAr(self.X)
def test_init(self):
X = ObservableArray(self.X)
X2 = ObservableArray(X)
X = ObsAr(self.X)
X2 = ObsAr(X)
self.assertIs(X, X2, "no new Observable array, when Observable is given")
def test_slice(self):
@ -34,9 +34,9 @@ class ParameterizedTest(unittest.TestCase):
self.param = Param('param', np.random.rand(25,2), Logistic(0, 1))
self.test1 = GPy.core.Parameterized("test model")
self.test1.add_parameter(self.white)
self.test1.add_parameter(self.rbf, 0)
self.test1.add_parameter(self.param)
self.test1.kern = self.rbf+self.white
self.test1.add_parameter(self.test1.kern)
self.test1.add_parameter(self.param, 0)
x = np.linspace(-2,6,4)[:,None]
y = np.sin(x)
@ -45,22 +45,24 @@ class ParameterizedTest(unittest.TestCase):
def test_add_parameter(self):
self.assertEquals(self.rbf._parent_index_, 0)
self.assertEquals(self.white._parent_index_, 1)
self.assertEquals(self.param._parent_index_, 0)
pass
def test_fixes(self):
self.white.fix(warning=False)
self.test1.remove_parameter(self.test1.param)
self.test1.remove_parameter(self.param)
self.assertTrue(self.test1._has_fixes())
from GPy.core.parameterization.transformations import FIXED, UNFIXED
self.assertListEqual(self.test1._fixes_.tolist(),[UNFIXED,UNFIXED,FIXED])
self.test1.add_parameter(self.white, 0)
self.test1.kern.add_parameter(self.white, 0)
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED,UNFIXED,UNFIXED])
self.test1.kern.rbf.fix()
self.assertListEqual(self.test1._fixes_.tolist(),[FIXED]*3)
def test_remove_parameter(self):
from GPy.core.parameterization.transformations import FIXED, UNFIXED, __fixed__, Logexp
self.white.fix()
self.test1.remove_parameter(self.white)
self.test1.kern.remove_parameter(self.white)
self.assertIs(self.test1._fixes_,None)
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
@ -81,7 +83,12 @@ class ParameterizedTest(unittest.TestCase):
self.assertListEqual(self.white._fixes_.tolist(), [FIXED])
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertIs(self.test1.constraints, self.param.constraints._param_index_ops)
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), [0,1])
self.assertListEqual(self.test1.constraints[Logexp()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
def test_remove_parameter_param_array_grad_array(self):
val = self.test1.kern._param_array_.copy()
self.test1.kern.remove_parameter(self.white)
self.assertListEqual(self.test1.kern._param_array_.tolist(), val[:2].tolist())
def test_add_parameter_already_in_hirarchy(self):
self.assertRaises(HierarchyError, self.test1.add_parameter, self.white._parameters_[0])
@ -91,34 +98,51 @@ class ParameterizedTest(unittest.TestCase):
self.assertIs(self.test1.constraints, self.rbf.constraints._param_index_ops)
self.assertListEqual(self.rbf.constraints.indices()[0].tolist(), range(2))
from GPy.core.parameterization.transformations import Logexp
kern = self.rbf+self.white
kern = self.test1.kern
self.test1.remove_parameter(kern)
self.assertListEqual(kern.constraints[Logexp()].tolist(), range(3))
def test_constraints(self):
self.rbf.constrain(GPy.transformations.Square(), False)
self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(2))
self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [2])
self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), range(self.param.size, self.param.size+self.rbf.size))
self.assertListEqual(self.test1.constraints[GPy.transformations.Logexp()].tolist(), [self.param.size+self.rbf.size])
self.test1.remove_parameter(self.rbf)
self.test1.kern.remove_parameter(self.rbf)
self.assertListEqual(self.test1.constraints[GPy.transformations.Square()].tolist(), [])
def test_constraints_views(self):
self.assertEqual(self.white.constraints._offset, 2)
self.assertEqual(self.rbf.constraints._offset, 0)
self.assertEqual(self.param.constraints._offset, 3)
self.assertEqual(self.white.constraints._offset, self.param.size+self.rbf.size)
self.assertEqual(self.rbf.constraints._offset, self.param.size)
self.assertEqual(self.param.constraints._offset, 0)
def test_fixing_randomize(self):
self.white.fix(warning=False)
val = float(self.test1.white.variance)
self.white.fix(warning=True)
val = float(self.white.variance)
self.test1.randomize()
self.assertEqual(val, self.white.variance)
def test_randomize(self):
ps = self.test1.param.view(np.ndarray).copy()
self.test1.param.randomize()
self.assertFalse(np.all(ps==self.test1.param))
def test_fixing_randomize_parameter_handling(self):
self.rbf.fix(warning=True)
val = float(self.rbf.variance)
self.test1.kern.randomize()
self.assertEqual(val, self.rbf.variance)
def test_fixing_optimize(self):
self.testmodel.kern.lengthscale.fix()
val = float(self.testmodel.kern.lengthscale)
self.testmodel.randomize()
self.assertEqual(val, self.testmodel.kern.lengthscale)
def test_printing(self):
print self.test1
print self.param
print self.test1['']
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.test_add_parameter']
unittest.main()

View file

@ -56,8 +56,6 @@ def ICM(input_dim, num_outputs, kernel, W_rank=1,W=None,kappa=None,name='X'):
warnings.warn("kernel's input dimension overwritten to fit input_dim parameter.")
K = kernel.prod(GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B'),name=name)
#K = kernel * GPy.kern.Coregionalize(1, num_outputs, active_dims=[input_dim], rank=W_rank,W=W,kappa=kappa,name='B')
#K = kernel ** GPy.kern.Coregionalize(input_dim, num_outputs,W_rank,W,kappa, name= 'B')
K['.*variance'] = 1.
K['.*variance'].fix()
return K

View file

@ -18,7 +18,7 @@ setup(name = 'GPy',
license = "BSD 3-clause",
keywords = "machine-learning gaussian-processes kernels",
url = "http://sheffieldml.github.com/GPy/",
packages = ['GPy', 'GPy.core', 'GPy.kern', 'GPy.util', 'GPy.models', 'GPy.inference', 'GPy.examples', 'GPy.likelihoods', 'GPy.testing', 'GPy.util.latent_space_visualizations', 'GPy.util.latent_space_visualizations.controllers', 'GPy.likelihoods.noise_models', 'GPy.kern.parts', 'GPy.mappings'],
packages = ["GPy.models", "GPy.inference.optimization", "GPy.inference", "GPy.inference.latent_function_inference", "GPy.likelihoods", "GPy.mappings", "GPy.examples", "GPy.core.parameterization", "GPy.core", "GPy.testing", "GPy", "GPy.util", "GPy.kern", "GPy.kern._src.psi_comp", "GPy.kern._src", "GPy.plotting.matplot_dep.latent_space_visualizations.controllers", "GPy.plotting.matplot_dep.latent_space_visualizations", "GPy.plotting.matplot_dep", "GPy.plotting"],
package_dir={'GPy': 'GPy'},
package_data = {'GPy': ['GPy/examples']},
py_modules = ['GPy.__init__'],
@ -29,6 +29,4 @@ setup(name = 'GPy',
},
classifiers=[
"License :: OSI Approved :: BSD License"],
#ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py',
# sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
)