From 128ebbabc5f99365f3ac7d9490c2a271854f064f Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Thu, 29 Aug 2013 19:26:00 +0200 Subject: [PATCH] Added mlp mapping (with possibility of having multiple layers). --- GPy/core/gp.py | 1 - GPy/core/mapping.py | 5 +- GPy/examples/dimensionality_reduction.py | 2 +- GPy/kern/constructors.py | 38 +++++- GPy/kern/parts/__init__.py | 1 + GPy/mappings/__init__.py | 8 +- GPy/mappings/{kernel_mapping.py => kernel.py} | 6 +- GPy/mappings/{linear_mapping.py => linear.py} | 5 +- GPy/mappings/mlp.py | 126 ++++++++++++++++++ GPy/models/gplvm.py | 2 + GPy/notes.txt | 4 + GPy/testing/mapping_tests.py | 6 + 12 files changed, 187 insertions(+), 17 deletions(-) rename GPy/mappings/{kernel_mapping.py => kernel.py} (94%) rename GPy/mappings/{linear_mapping.py => linear.py} (94%) create mode 100644 GPy/mappings/mlp.py diff --git a/GPy/core/gp.py b/GPy/core/gp.py index faa77e80..278ddc74 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -6,7 +6,6 @@ import numpy as np import pylab as pb from .. import kern from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs -# from ..util.plot import gpplot, Tango from ..likelihoods import EP from gp_base import GPBase diff --git a/GPy/core/mapping.py b/GPy/core/mapping.py index 074a30fc..02b9664a 100644 --- a/GPy/core/mapping.py +++ b/GPy/core/mapping.py @@ -1,11 +1,10 @@ # Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -import numpy as np -from .. import kern from ..util.plot import Tango, x_frame1D, x_frame2D +from parameterized import Parameterized +import numpy as np import pylab as pb -from GPy.core.parameterized import Parameterized class Mapping(Parameterized): """ diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 09cc9abe..994e3b48 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -397,7 +397,7 @@ def bcgplvm_linear_stick(kernel=None): def bcgplvm_stick(kernel=None): data = GPy.util.datasets.osu_run1() # optimize - back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=10.) + back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.) mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) m.optimize(messages=1, max_f_eval=10000) diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index 03cf43c1..025867a9 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -69,6 +69,40 @@ def mlp(input_dim,variance=1., weight_variance=None,bias_variance=100.,ARD=False part = parts.mlp.MLP(input_dim,variance,weight_variance,bias_variance,ARD) return kern(input_dim, [part]) +# def gibbs(input_dim,variance=1., mapping=None): +# """ +# Gibbs and MacKay non-stationary covariance function. + +# .. math:: + +# r = sqrt((x_i - x_j)'*(x_i - x_j)) + +# k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x'))) + +# Z = \sqrt{2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')} + +# where :math:`l(x)` is a function giving the length scale as a function of space. +# This is the non stationary kernel proposed by Mark Gibbs in his 1997 +# thesis. It is similar to an RBF but has a length scale that varies +# with input location. This leads to an additional term in front of +# the kernel. + +# The parameters are :math:`\sigma^2`, the process variance, and the parameters of l(x) which is a function that can be specified by the user, by default an multi-layer peceptron is used is used. + +# :param input_dim: the number of input dimensions +# :type input_dim: int +# :param variance: the variance :math:`\sigma^2` +# :type variance: float +# :param mapping: the mapping that gives the lengthscale across the input space. +# :type mapping: GPy.core.Mapping +# :param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension. +# :type ARD: Boolean +# :rtype: Kernpart object + +# """ +# part = parts.gibbs.Gibbs(input_dim,variance,mapping) +# return kern(input_dim, [part]) + def poly(input_dim,variance=1., weight_variance=None,bias_variance=1.,degree=2, ARD=False): """ Construct a polynomial kernel @@ -312,10 +346,10 @@ def coregionalise(output_dim, rank=1, W=None, kappa=None): Used for computing covariance functions of the form .. math:: - k_2(x, y)=B k(x, y) + k_2(x, y)=\mathbf{B} k(x, y) where .. math:: - B = WW^\top + kappa I.. + \mathbf{B} = \mathbf{W}\mathbf{W}^\top + kappa \mathbf{I} :param output_dim: the number of output dimensions :type output_dim: int diff --git a/GPy/kern/parts/__init__.py b/GPy/kern/parts/__init__.py index cf8df575..053e280f 100644 --- a/GPy/kern/parts/__init__.py +++ b/GPy/kern/parts/__init__.py @@ -4,6 +4,7 @@ import coregionalise import exponential import finite_dimensional import fixed +import gibbs import independent_outputs import linear import Matern32 diff --git a/GPy/mappings/__init__.py b/GPy/mappings/__init__.py index d1383c18..b0d90ba2 100644 --- a/GPy/mappings/__init__.py +++ b/GPy/mappings/__init__.py @@ -1,7 +1,7 @@ # Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernel_mapping import Kernel -from linear_mapping import Linear -#from mlp_mapping import MLP -#from rbf_mapping import RBF +#from kernel import Kernel +from linear import Linear +from mlp import MLP +#from rbf import RBF diff --git a/GPy/mappings/kernel_mapping.py b/GPy/mappings/kernel.py similarity index 94% rename from GPy/mappings/kernel_mapping.py rename to GPy/mappings/kernel.py index 6ea4322f..a794e561 100644 --- a/GPy/mappings/kernel_mapping.py +++ b/GPy/mappings/kernel.py @@ -2,8 +2,8 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from ..core import Mapping -from .. import kern +from ..core.mapping import Mapping +from ..kern.kern import kern class Kernel(Mapping): """ @@ -42,7 +42,7 @@ class Kernel(Mapping): def _set_params(self, x): self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy() - self.bias = x[self.num_data*self.output_dim:] + self.bias = x[self.num_data*self.output_dim:].copy() def randomize(self): self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1) diff --git a/GPy/mappings/linear_mapping.py b/GPy/mappings/linear.py similarity index 94% rename from GPy/mappings/linear_mapping.py rename to GPy/mappings/linear.py index 969cdd82..5f6aaa49 100644 --- a/GPy/mappings/linear_mapping.py +++ b/GPy/mappings/linear.py @@ -2,8 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -from ..core import Mapping -from .. import kern +from ..core.mapping import Mapping class Linear(Mapping): """ @@ -35,7 +34,7 @@ class Linear(Mapping): def _set_params(self, x): self.W = x[:self.input_dim * self.output_dim].reshape(self.input_dim, self.output_dim).copy() - self.bias = x[self.input_dim*self.output_dim:] + self.bias = x[self.input_dim*self.output_dim:].copy() def randomize(self): self.W = np.random.randn(self.input_dim, self.output_dim)/np.sqrt(self.input_dim + 1) self.bias = np.random.randn(self.output_dim)/np.sqrt(self.input_dim + 1) diff --git a/GPy/mappings/mlp.py b/GPy/mappings/mlp.py new file mode 100644 index 00000000..ce0e04b0 --- /dev/null +++ b/GPy/mappings/mlp.py @@ -0,0 +1,126 @@ +# Copyright (c) 2013, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +from ..core.mapping import Mapping + +class MLP(Mapping): + """ + Mapping based on a multi-layer perceptron neural network model. + + .. math:: + + f(\mathbf{x}*) = \mathbf{W}^0\boldsymbol{\phi}(\mathbf{W}^1\mathbf{x}+\mathb{b}^1)^* + \mathbf{b}^0 + + where + ..math:: + \phi(\cdot) = \text{tanh}(\cdot) + + :param X: input observations + :type X: ndarray + :param output_dim: dimension of output. + :type output_dim: int + :param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it. + :type hidden_dim: int or list of ints. + """ + + def __init__(self, input_dim=1, output_dim=1, hidden_dim=3): + Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim) + if isinstance(hidden_dim, int): + hidden_dim = [hidden_dim] + self.hidden_dim = hidden_dim + self.activation = [None]*len(self.hidden_dim) + self.W = [] + self._dL_dW = [] + self.bias = [] + self._dL_dbias = [] + self.W.append(np.zeros((self.input_dim, self.hidden_dim[0]))) + self._dL_dW.append(np.zeros((self.input_dim, self.hidden_dim[0]))) + self.bias.append(np.zeros(self.hidden_dim[0])) + self._dL_dbias.append(np.zeros(self.hidden_dim[0])) + self.num_params = self.hidden_dim[0]*(self.input_dim+1) + for h1, h0 in zip(hidden_dim[1:], hidden_dim[0:-1]): + self.W.append(np.zeros((h0, h1))) + self._dL_dW.append(np.zeros((h0, h1))) + self.bias.append(np.zeros(h1)) + self._dL_dbias.append(np.zeros(h1)) + self.num_params += h1*(h0+1) + self.W.append(np.zeros((self.hidden_dim[-1], self.output_dim))) + self._dL_dW.append(np.zeros((self.hidden_dim[-1], self.output_dim))) + self.bias.append(np.zeros(self.output_dim)) + self._dL_dbias.append(np.zeros(self.output_dim)) + self.num_params += self.output_dim*(self.hidden_dim[-1]+1) + self.randomize() + + def _get_param_names(self): + return sum([['W%i_%i_%i' % (i, n, d) for n in range(self.W[i].shape[0]) for d in range(self.W[i].shape[1])] + ['bias%i_%i' % (i, d) for d in range(self.W[i].shape[1])] for i in range(len(self.W))], []) + + def _get_params(self): + param = np.array([]) + for W, bias in zip(self.W, self.bias): + param = np.hstack((param, W.flatten(), bias)) + return param + + def _set_params(self, x): + start = 0 + for W, bias in zip(self.W, self.bias): + end = W.shape[0]*W.shape[1]+start + W[:] = x[start:end].reshape(W.shape[0], W.shape[1]).copy() + start = end + end = W.shape[1]+end + bias[:] = x[start:end].copy() + start = end + + def randomize(self): + for W, bias in zip(self.W, self.bias): + W[:] = np.random.randn(W.shape[0], W.shape[1])/np.sqrt(W.shape[0]+1) + bias[:] = np.random.randn(W.shape[1])/np.sqrt(W.shape[0]+1) + + def f(self, X): + self._f_computations(X) + return np.dot(np.tanh(self.activation[-1]), self.W[-1]) + self.bias[-1] + + def _f_computations(self, X): + W = self.W[0] + bias = self.bias[0] + self.activation[0] = np.dot(X,W) + bias + for W, bias, index in zip(self.W[1:-1], self.bias[1:-1], range(1, len(self.activation))): + self.activation[index] = np.dot(np.tanh(self.activation[index-1]), W)+bias + + def df_dtheta(self, dL_df, X): + self._df_computations(dL_df, X) + for gW, gbias in zip(self._dL_dW, self._dL_dbias): + g = np.hstack((g, gW.flatten(), gbias)) + return g + + def _df_computations(self, dL_df, X): + self._f_computations(X) + a0 = self.activation[-1] + W = self.W[-1] + self._dL_dW[-1] = (dL_df[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T + dL_dta=(dL_df[:, None, :]*W[None, :, :]).sum(2) + self._dL_dbias[-1] = (dL_df.sum(0)) + for dL_dW, dL_dbias, W, bias, a0, a1 in zip(self._dL_dW[-2:0:-1], + self._dL_dbias[-2:0:-1], + self.W[-2:0:-1], + self.bias[-2:0:-1], + self.activation[-2::-1], + self.activation[-1:0:-1]): + ta = np.tanh(a1) + dL_da = dL_dta*(1-ta*ta) + dL_dW[:] = (dL_da[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T + dL_dbias[:] = (dL_da.sum(0)) + dL_dta = (dL_da[:, None, :]*W[None, :, :]).sum(2) + ta = np.tanh(self.activation[0]) + dL_da = dL_dta*(1-ta*ta) + W = self.W[0] + self._dL_dW[0] = (dL_da[:, :, None]*X[:, None, :]).sum(0).T + self._dL_dbias[0] = (dL_da.sum(0)) + self._dL_dX = (dL_da[:, None, :]*W[None, :, :]).sum(2) + g = np.array([]) + + + def df_dX(self, dL_df, X): + self._df_computations(dL_df, X) + return self._dL_dX + diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index 292b3da4..c2a7768c 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -8,6 +8,7 @@ import sys, pdb from .. import kern from ..core import Model from ..util.linalg import pdinv, PCA +from ..core.priors import Gaussian as Gaussian_prior from ..core import GP from ..likelihoods import Gaussian from .. import util @@ -33,6 +34,7 @@ class GPLVM(GP): kernel = kern.rbf(input_dim, ARD=input_dim > 1) + kern.bias(input_dim, np.exp(-2)) likelihood = Gaussian(Y, normalize=normalize_Y, variance=np.exp(-2.)) GP.__init__(self, X, likelihood, kernel, normalize_X=False) + self.set_prior('.*X', Gaussian_prior(0, 1)) self.ensure_default_constraints() def initialise_latent(self, init, input_dim, Y): diff --git a/GPy/notes.txt b/GPy/notes.txt index 530b7036..c8c184a1 100644 --- a/GPy/notes.txt +++ b/GPy/notes.txt @@ -62,3 +62,7 @@ For some reason a stub of _get_param_names(self) wasn't available in the Paramet Is there a quick FAQ or something on how to build the documentation? I did it once, but can't remember! Similar for the nosetests ... even ran them last week but can't remember the command! + +Now added Gaussian priors to GPLVM latent variables by default. When running the GPy.examples.dimensionality_reduction.stick() example the print out from print model has the same value for the prior+likelihood as for the prior. + +For the back constrained GP-LVM need priors to be on the Xs not on the model parameters (because they aren't parameters, they are constraints). Need to work out how to do this. diff --git a/GPy/testing/mapping_tests.py b/GPy/testing/mapping_tests.py index 578c3dac..cd28e71a 100644 --- a/GPy/testing/mapping_tests.py +++ b/GPy/testing/mapping_tests.py @@ -21,6 +21,12 @@ class MappingTests(unittest.TestCase): self.assertTrue(GPy.core.Mapping_check_df_dtheta(mapping=mapping).checkgrad(verbose=verbose)) self.assertTrue(GPy.core.Mapping_check_df_dX(mapping=mapping).checkgrad(verbose=verbose)) + def test_mlpmapping(self): + verbose = False + mapping = GPy.mappings.MLP(input_dim=2, hidden_dim=[3, 4, 8, 2], output_dim=2) + self.assertTrue(GPy.core.Mapping_check_df_dtheta(mapping=mapping).checkgrad(verbose=verbose)) + self.assertTrue(GPy.core.Mapping_check_df_dX(mapping=mapping).checkgrad(verbose=verbose)) + if __name__ == "__main__":