Implemented Mapping framework and associated linear and kernel mappings. This is needed for mean functions, back constrained GPLVM and the non-stationary Gibbs kernel.

This commit is contained in:
Neil Lawrence 2013-08-28 13:51:50 +02:00
parent 84b7156d23
commit d31b5a7c55
12 changed files with 353 additions and 12 deletions

View file

@ -5,6 +5,7 @@ warnings.filterwarnings("ignore", category=DeprecationWarning)
import core
import models
import mappings
import inference
import util
import examples

View file

@ -8,3 +8,4 @@ from gp import GP
from sparse_gp import SparseGP
from fitc import FITC
from svigp import SVIGP
from mapping import *

191
GPy/core/mapping.py Normal file
View file

@ -0,0 +1,191 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from .. import kern
from ..util.plot import Tango, x_frame1D, x_frame2D
import pylab as pb
from GPy.core.parameterized import Parameterized
class Mapping(Parameterized):
"""
Base model for shared behavior between models that can act like a mapping.
"""
def __init__(self, input_dim, output_dim):
self.input_dim = input_dim
self.output_dim = output_dim
super(Mapping, self).__init__()
# Model.__init__(self)
# All leaf nodes should call self._set_params(self._get_params()) at
# the end
def f(self, X):
raise NotImplementedError
def df_dX(self, dL_df, X):
"""Evaluate derivatives of mapping outputs with respect to inputs.
:param dL_df: gradient of the objective with respect to the function.
:type dL_df: ndarray (num_data x output_dim)
:param X: the input locations where derivatives are to be evaluated.
:type X: ndarray (num_data x input_dim)
:returns: matrix containing gradients of the function with respect to the inputs.
"""
raise NotImplementedError
def df_dtheta(self, dL_df, X):
"""The gradient of the outputs of the multi-layer perceptron with respect to each of the parameters.
:param dL_df: gradient of the objective with respect to the function.
:type dL_df: ndarray (num_data x output_dim)
:param X: input locations where the function is evaluated.
:type X: ndarray (num_data x input_dim)
:returns: Matrix containing gradients with respect to parameters of each output for each input data.
:rtype: ndarray (num_params length)
"""
raise NotImplementedError
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue']):
"""
Plot the mapping.
Plots the mapping associated with the model.
- In one dimension, the function is plotted.
- In two dimsensions, a contour-plot shows the function
- In higher dimensions, we've not implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param linecol: color of line to plot.
:type linecol:
:param levels: for 2D plotting, the number of contour levels to use
is ax is None, create a new figure
"""
# TODO include samples
if which_data == 'all':
which_data = slice(None)
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
plotdims = self.input_dim - len(fixed_inputs)
if plotdims == 1:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
fixed_dims = np.array([i for i,v in fixed_inputs])
freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,freedim] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
f = self.predict(Xgrid, which_parts=which_parts)
for d in range(y.shape[1]):
ax.plot(Xnew, f[:, d], edgecol=linecol)
elif self.X.shape[1] == 2:
resolution = resolution or 50
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
f = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(x, y, f, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
from GPy.core.model import Model
class Mapping_check_model(Model):
"""This is a dummy model class used as a base class for checking that the gradients of a given mapping are implemented correctly. It enables checkgradient() to be called independently on each mapping."""
def __init__(self, mapping=None, dL_df=None, X=None):
num_samples = 20
if mapping==None:
mapping = GPy.mapping.linear(1, 1)
if X==None:
X = np.random.randn(num_samples, mapping.input_dim)
if dL_df==None:
dL_df = np.ones((num_samples, mapping.output_dim))
self.mapping=mapping
self.X = X
self.dL_df = dL_df
self.num_params = self.mapping.num_params
Model.__init__(self)
def _get_params(self):
return self.mapping._get_params()
def _get_param_names(self):
return self.mapping._get_param_names()
def _set_params(self, x):
self.mapping._set_params(x)
def log_likelihood(self):
return (self.dL_df*self.mapping.f(self.X)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the Mapping_check_model class."
class Mapping_check_df_dtheta(Mapping_check_model):
"""This class allows gradient checks for the gradient of a mapping with respect to parameters. """
def __init__(self, mapping=None, dL_df=None, X=None):
Mapping_check_model.__init__(self,mapping=mapping,dL_df=dL_df, X=X)
def _log_likelihood_gradients(self):
return self.mapping.df_dtheta(self.dL_df, self.X)
class Mapping_check_df_dX(Mapping_check_model):
"""This class allows gradient checks for the gradient of a mapping with respect to X. """
def __init__(self, mapping=None, dL_df=None, X=None):
Mapping_check_model.__init__(self,mapping=mapping,dL_df=dL_df, X=X)
if dL_df==None:
dL_df = np.ones((self.X.shape[0],self.mapping.output_dim))
self.num_params = self.X.shape[0]*self.mapping.input_dim
def _log_likelihood_gradients(self):
return self.mapping.df_dX(self.dL_df, self.X).flatten()
def _get_param_names(self):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)

View file

@ -21,9 +21,12 @@ class Parameterized(object):
self.constraints = []
def _get_params(self):
raise NotImplementedError, "this needs to be implemented to use the Model class"
raise NotImplementedError, "this needs to be implemented to use the Parameterized class"
def _set_params(self, x):
raise NotImplementedError, "this needs to be implemented to use the Model class"
raise NotImplementedError, "this needs to be implemented to use the Parameterized class"
def _get_param_names(self):
raise NotImplementedError, "this needs to be implemented to use the Parameterized class"
def pickle(self, filename, protocol=None):
if protocol is None:
@ -162,7 +165,7 @@ class Parameterized(object):
return len(self._get_params()) - removed
def unconstrain(self, regexp):
"""Unconstrain matching parameters. does not untie parameters"""
"""Unconstrain matching parameters. Does not untie parameters"""
matches = self.grep_param_names(regexp)
# tranformed contraints:
@ -227,10 +230,11 @@ class Parameterized(object):
"""
Arguments
---------
:param regexp: np.array(dtype=int), or regular expression object or string
:param value: a float to fix the matched values to. If the value is not specified,
:param regexp: which parameters need to be fixed.
:type regexp: ndarray(dtype=int) or regular expression object or string
:param value: the vlaue to fix the parameters to. If the value is not specified,
the parameter is fixed to the current value
:type value: float
Notes
-----
Fixing a parameter which is tied to another, or constrained in some way will result in an error.

View file

@ -378,6 +378,21 @@ def stick(kernel=None):
return m
def bcgplvm_stick(kernel=None):
data = GPy.util.datasets.osu_run1()
# optimize
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel)
m.optimize(messages=1, max_f_eval=10000)
if GPy.util.visualize.visual_available:
plt.clf
ax = m.plot_latent()
y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish')
return m
def robot_wireless():
data = GPy.util.datasets.robot_wireless()
# optimize

7
GPy/mappings/__init__.py Normal file
View file

@ -0,0 +1,7 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernel_mapping import Kernel
from linear_mapping import Linear
#from mlp_mapping import MLP
#from rbf_mapping import RBF

View file

@ -0,0 +1,60 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import Mapping
from .. import kern
class Kernel(Mapping):
"""
Mapping based on a kernel/covariance function.
.. math::
f(\mathbf{x}*) = \mathbf{A}\mathbf{k}(\mathbf{X}, \mathbf{x}^*) + \mathbf{b}
:param X: input observations containing :math:`\mathbf{X}`
:type X: ndarray
:param output_dim: dimension of output.
:type output_dim: int
:param kernel: a GPy kernel, defaults to GPy.kern.rbf
:type kernel: GPy.kern.kern
"""
def __init__(self, X, output_dim=1, kernel=None):
Mapping.__init__(self, input_dim=X.shape[1], output_dim=output_dim)
if kernel is None:
kernel = kern.rbf(self.input_dim)
self.kern = kernel
self.X = X
self.num_data = X.shape[0]
self.num_params = self.output_dim*(self.num_data + 1)
self.A = np.array((self.num_data, self.output_dim))
self.bias = np.array(self.output_dim)
self.randomize()
def _get_param_names(self):
return sum([['A_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.num_data)], []) + ['bias_%i' % d for d in range(self.output_dim)]
def _get_params(self):
return np.hstack((self.A.flatten(), self.bias))
def _set_params(self, x):
self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy()
self.bias = x[self.num_data*self.output_dim:]
def randomize(self):
self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1)
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.num_data+1)
def f(self, X):
return np.dot(self.kern.K(X, self.X),self.A) + self.bias
def df_dtheta(self, dL_df, X):
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T
self._df_dbias = (dL_df.sum(0))
return np.hstack((self._df_dA.flatten(), self._df_dbias))
def df_dX(self, dL_df, X):
return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)

View file

@ -0,0 +1,53 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import Mapping
from .. import kern
class Linear(Mapping):
"""
Mapping based on a linear model.
.. math::
f(\mathbf{x}*) = \mathbf{W}\mathbf{x}^* + \mathbf{b}
:param X: input observations
:type X: ndarray
:param output_dim: dimension of output.
:type output_dim: int
"""
def __init__(self, input_dim=1, output_dim=1):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.num_params = self.output_dim*(self.input_dim + 1)
self.W = np.array((self.input_dim, self.output_dim))
self.bias = np.array(self.output_dim)
self.randomize()
def _get_param_names(self):
return sum([['W_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.input_dim)], []) + ['bias_%i' % d for d in range(self.output_dim)]
def _get_params(self):
return np.hstack((self.W.flatten(), self.bias))
def _set_params(self, x):
self.W = x[:self.input_dim * self.output_dim].reshape(self.input_dim, self.output_dim).copy()
self.bias = x[self.input_dim*self.output_dim:]
def randomize(self):
self.W = np.random.randn(self.input_dim, self.output_dim)/np.sqrt(self.input_dim + 1)
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.input_dim + 1)
def f(self, X):
return np.dot(X,self.W) + self.bias
def df_dtheta(self, dL_df, X):
self._df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
self._df_dbias = (dL_df.sum(0))
return np.hstack((self._df_dW.flatten(), self._df_dbias))
def df_dX(self, dL_df, X):
return (dL_df[:, None, :]*self.W[None, :, :]).sum(2)

View file

@ -8,6 +8,7 @@ from svigp_regression import SVIGPRegression
from sparse_gp_classification import SparseGPClassification
from fitc_classification import FITCClassification
from gplvm import GPLVM
from bcgplvm import BCGPLVM
from sparse_gplvm import SparseGPLVM
from warped_gp import WarpedGP
from bayesian_gplvm import BayesianGPLVM

View file

@ -30,8 +30,8 @@ class GPLVM(GP):
if X is None:
X = self.initialise_latent(init, input_dim, Y)
if kernel is None:
kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2)) + kern.white(input_dim, np.exp(-2))
likelihood = Gaussian(Y, normalize=normalize_Y)
kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2))
likelihood = Gaussian(Y, normalize=normalize_Y, variance=np.exp(-2.))
GP.__init__(self, X, likelihood, kernel, normalize_X=False)
self.ensure_default_constraints()

View file

@ -54,3 +54,11 @@ Need to check for nan values in likelihoods. These should be treated as missing
Sometimes you want to print kernpart objects, for diagnosis etc. This isn't possible currently.
Why do likelihoods still have YYT everywhere, didn't we agree to set observed data to Y and latent function to F?
For some reason a stub of _get_param_names(self) wasn't available in the Parameterized base class. Have put it in (is this right?)
Is there a quick FAQ or something on how to build the documentation? I did it once, but can't remember!
Similar for the nosetests ... even ran them last week but can't remember the command!

View file

@ -21,10 +21,10 @@ class KernelTests(unittest.TestCase):
def test_rbfkernel(self):
verbose = False
kern = GPy.kern.rbf(5)
self.assertTrue(Kern_check_model(kern).is_positive_definite())
self.assertTrue(Kern_check_dK_dtheta(kern).checkgrad(verbose=verbose))
self.assertTrue(Kern_check_dKdiag_dtheta(kern).checkgrad(verbose=verbose))
self.assertTrue(Kern_check_dK_dX(kern).checkgrad(verbose=verbose))
self.assertTrue(GPy.kern.Kern_check_model(kern).is_positive_definite())
self.assertTrue(GPy.kern.Kern_check_dK_dtheta(kern).checkgrad(verbose=verbose))
self.assertTrue(GPy.kern.Kern_check_dKdiag_dtheta(kern).checkgrad(verbose=verbose))
self.assertTrue(GPy.kern.Kern_check_dK_dX(kern).checkgrad(verbose=verbose))
def test_fixedkernel(self):
"""