Added mlp mapping (with possibility of having multiple layers).

This commit is contained in:
Neil Lawrence 2013-08-29 19:26:00 +02:00
parent e6739788ea
commit 128ebbabc5
12 changed files with 187 additions and 17 deletions

View file

@ -6,7 +6,6 @@ import numpy as np
import pylab as pb
from .. import kern
from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs
# from ..util.plot import gpplot, Tango
from ..likelihoods import EP
from gp_base import GPBase

View file

@ -1,11 +1,10 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from .. import kern
from ..util.plot import Tango, x_frame1D, x_frame2D
from parameterized import Parameterized
import numpy as np
import pylab as pb
from GPy.core.parameterized import Parameterized
class Mapping(Parameterized):
"""

View file

@ -397,7 +397,7 @@ def bcgplvm_linear_stick(kernel=None):
def bcgplvm_stick(kernel=None):
data = GPy.util.datasets.osu_run1()
# optimize
back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=10.)
back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.)
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
m.optimize(messages=1, max_f_eval=10000)

View file

@ -69,6 +69,40 @@ def mlp(input_dim,variance=1., weight_variance=None,bias_variance=100.,ARD=False
part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD)
return kern(input_dim, [part])
# def gibbs(input_dim,variance=1., mapping=None):
# """
# Gibbs and MacKay non-stationary covariance function.
# .. math::
# r = sqrt((x_i - x_j)'*(x_i - x_j))
# k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
# Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')}
# where :math:`l(x)` is a function giving the length scale as a function of space.
# This is the non stationary kernel proposed by Mark Gibbs in his 1997
# thesis. It is similar to an RBF but has a length scale that varies
# with input location. This leads to an additional term in front of
# the kernel.
# The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used.
# :param input_dim: the number of input dimensions
# :type input_dim: int
# :param variance: the variance :math:`\sigma^2`
# :type variance: float
# :param mapping: the mapping that gives the lengthscale across the input space.
# :type mapping: GPy.core.Mapping
# :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
# :type ARD: Boolean
# :rtype: Kernpart object
# """
# part = parts.gibbs.Gibbs(input_dim,variance,mapping)
# return kern(input_dim, [part])
def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False):
"""
Construct a polynomial kernel
@ -312,10 +346,10 @@ def coregionalise(output_dim, rank=1, W=None, kappa=None):
Used for computing covariance functions of the form
.. math::
k_2(x, y)=B k(x, y)
k_2(x, y)=\mathbf{B} k(x, y)
where
.. math::
B = WW^\top + kappa I..
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I}
:param output_dim: the number of output dimensions
:type output_dim: int

View file

@ -4,6 +4,7 @@ import coregionalise
import exponential
import finite_dimensional
import fixed
import gibbs
import independent_outputs
import linear
import Matern32

View file

@ -1,7 +1,7 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernel_mapping import Kernel
from linear_mapping import Linear
#from mlp_mapping import MLP
#from rbf_mapping import RBF
#from kernel import Kernel
from linear import Linear
from mlp import MLP
#from rbf import RBF

View file

@ -2,8 +2,8 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import Mapping
from .. import kern
from ..core.mapping import Mapping
from ..kern.kern import kern
class Kernel(Mapping):
"""
@ -42,7 +42,7 @@ class Kernel(Mapping):
def _set_params(self, x):
self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy()
self.bias = x[self.num_data*self.output_dim:]
self.bias = x[self.num_data*self.output_dim:].copy()
def randomize(self):
self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1)

View file

@ -2,8 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import Mapping
from .. import kern
from ..core.mapping import Mapping
class Linear(Mapping):
"""
@ -35,7 +34,7 @@ class Linear(Mapping):
def _set_params(self, x):
self.W = x[:self.input_dim * self.output_dim].reshape(self.input_dim, self.output_dim).copy()
self.bias = x[self.input_dim*self.output_dim:]
self.bias = x[self.input_dim*self.output_dim:].copy()
def randomize(self):
self.W = np.random.randn(self.input_dim, self.output_dim)/np.sqrt(self.input_dim + 1)
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.input_dim + 1)

126
GPy/mappings/mlp.py Normal file
View file

@ -0,0 +1,126 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.mapping import Mapping
class MLP(Mapping):
"""
Mapping based on a multi-layer perceptron neural network model.
.. math::
f(\mathbf{x}*) = \mathbf{W}^0\boldsymbol{\phi}(\mathbf{W}^1\mathbf{x}+\mathb{b}^1)^* + \mathbf{b}^0
where
..math::
\phi(\cdot) = \text{tanh}(\cdot)
:param X: input observations
:type X: ndarray
:param output_dim: dimension of output.
:type output_dim: int
:param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it.
:type hidden_dim: int or list of ints.
"""
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
if isinstance(hidden_dim, int):
hidden_dim = [hidden_dim]
self.hidden_dim = hidden_dim
self.activation = [None]*len(self.hidden_dim)
self.W = []
self._dL_dW = []
self.bias = []
self._dL_dbias = []
self.W.append(np.zeros((self.input_dim, self.hidden_dim[0])))
self._dL_dW.append(np.zeros((self.input_dim, self.hidden_dim[0])))
self.bias.append(np.zeros(self.hidden_dim[0]))
self._dL_dbias.append(np.zeros(self.hidden_dim[0]))
self.num_params = self.hidden_dim[0]*(self.input_dim+1)
for h1, h0 in zip(hidden_dim[1:], hidden_dim[0:-1]):
self.W.append(np.zeros((h0, h1)))
self._dL_dW.append(np.zeros((h0, h1)))
self.bias.append(np.zeros(h1))
self._dL_dbias.append(np.zeros(h1))
self.num_params += h1*(h0+1)
self.W.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
self._dL_dW.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
self.bias.append(np.zeros(self.output_dim))
self._dL_dbias.append(np.zeros(self.output_dim))
self.num_params += self.output_dim*(self.hidden_dim[-1]+1)
self.randomize()
def _get_param_names(self):
return sum([['W%i_%i_%i' % (i, n, d) for n in range(self.W[i].shape[0]) for d in range(self.W[i].shape[1])] + ['bias%i_%i' % (i, d) for d in range(self.W[i].shape[1])] for i in range(len(self.W))], [])
def _get_params(self):
param = np.array([])
for W, bias in zip(self.W, self.bias):
param = np.hstack((param, W.flatten(), bias))
return param
def _set_params(self, x):
start = 0
for W, bias in zip(self.W, self.bias):
end = W.shape[0]*W.shape[1]+start
W[:] = x[start:end].reshape(W.shape[0], W.shape[1]).copy()
start = end
end = W.shape[1]+end
bias[:] = x[start:end].copy()
start = end
def randomize(self):
for W, bias in zip(self.W, self.bias):
W[:] = np.random.randn(W.shape[0], W.shape[1])/np.sqrt(W.shape[0]+1)
bias[:] = np.random.randn(W.shape[1])/np.sqrt(W.shape[0]+1)
def f(self, X):
self._f_computations(X)
return np.dot(np.tanh(self.activation[-1]), self.W[-1]) + self.bias[-1]
def _f_computations(self, X):
W = self.W[0]
bias = self.bias[0]
self.activation[0] = np.dot(X,W) + bias
for W, bias, index in zip(self.W[1:-1], self.bias[1:-1], range(1, len(self.activation))):
self.activation[index] = np.dot(np.tanh(self.activation[index-1]), W)+bias
def df_dtheta(self, dL_df, X):
self._df_computations(dL_df, X)
for gW, gbias in zip(self._dL_dW, self._dL_dbias):
g = np.hstack((g, gW.flatten(), gbias))
return g
def _df_computations(self, dL_df, X):
self._f_computations(X)
a0 = self.activation[-1]
W = self.W[-1]
self._dL_dW[-1] = (dL_df[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
dL_dta=(dL_df[:, None, :]*W[None, :, :]).sum(2)
self._dL_dbias[-1] = (dL_df.sum(0))
for dL_dW, dL_dbias, W, bias, a0, a1 in zip(self._dL_dW[-2:0:-1],
self._dL_dbias[-2:0:-1],
self.W[-2:0:-1],
self.bias[-2:0:-1],
self.activation[-2::-1],
self.activation[-1:0:-1]):
ta = np.tanh(a1)
dL_da = dL_dta*(1-ta*ta)
dL_dW[:] = (dL_da[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
dL_dbias[:] = (dL_da.sum(0))
dL_dta = (dL_da[:, None, :]*W[None, :, :]).sum(2)
ta = np.tanh(self.activation[0])
dL_da = dL_dta*(1-ta*ta)
W = self.W[0]
self._dL_dW[0] = (dL_da[:, :, None]*X[:, None, :]).sum(0).T
self._dL_dbias[0] = (dL_da.sum(0))
self._dL_dX = (dL_da[:, None, :]*W[None, :, :]).sum(2)
g = np.array([])
def df_dX(self, dL_df, X):
self._df_computations(dL_df, X)
return self._dL_dX

View file

@ -8,6 +8,7 @@ import sys, pdb
from .. import kern
from ..core import Model
from ..util.linalg import pdinv, PCA
from ..core.priors import Gaussian as Gaussian_prior
from ..core import GP
from ..likelihoods import Gaussian
from .. import util
@ -33,6 +34,7 @@ class GPLVM(GP):
kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2))
likelihood = Gaussian(Y, normalize=normalize_Y, variance=np.exp(-2.))
GP.__init__(self, X, likelihood, kernel, normalize_X=False)
self.set_prior('.*X', Gaussian_prior(0, 1))
self.ensure_default_constraints()
def initialise_latent(self, init, input_dim, Y):

View file

@ -62,3 +62,7 @@ For some reason a stub of _get_param_names(self) wasn't available in the Paramet
Is there a quick FAQ or something on how to build the documentation? I did it once, but can't remember!
Similar for the nosetests ... even ran them last week but can't remember the command!
Now added Gaussian priors to GPLVM latent variables by default. When running the GPy.examples.dimensionality_reduction.stick() example the print out from print model has the same value for the prior+likelihood as for the prior.
For the back constrained GP-LVM need priors to be on the Xs not on the model parameters (because they aren't parameters, they are constraints). Need to work out how to do this.

View file

@ -21,6 +21,12 @@ class MappingTests(unittest.TestCase):
self.assertTrue(GPy.core.Mapping_check_df_dtheta(mapping=mapping).checkgrad(verbose=verbose))
self.assertTrue(GPy.core.Mapping_check_df_dX(mapping=mapping).checkgrad(verbose=verbose))
def test_mlpmapping(self):
verbose = False
mapping = GPy.mappings.MLP(input_dim=2, hidden_dim=[3, 4, 8, 2], output_dim=2)
self.assertTrue(GPy.core.Mapping_check_df_dtheta(mapping=mapping).checkgrad(verbose=verbose))
self.assertTrue(GPy.core.Mapping_check_df_dX(mapping=mapping).checkgrad(verbose=verbose))
if __name__ == "__main__":