Added mlp mapping (with possibility of having multiple layers).

This commit is contained in:
Neil Lawrence 2013-08-29 19:26:00 +02:00
parent e6739788ea
commit 128ebbabc5
12 changed files with 187 additions and 17 deletions

View file

@ -1,7 +1,7 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernel_mapping import Kernel
from linear_mapping import Linear
#from mlp_mapping import MLP
#from rbf_mapping import RBF
#from kernel import Kernel
from linear import Linear
from mlp import MLP
#from rbf import RBF

View file

@ -2,8 +2,8 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import Mapping
from .. import kern
from ..core.mapping import Mapping
from ..kern.kern import kern
class Kernel(Mapping):
"""
@ -42,7 +42,7 @@ class Kernel(Mapping):
def _set_params(self, x):
self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy()
self.bias = x[self.num_data*self.output_dim:]
self.bias = x[self.num_data*self.output_dim:].copy()
def randomize(self):
self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1)

View file

@ -2,8 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import Mapping
from .. import kern
from ..core.mapping import Mapping
class Linear(Mapping):
"""
@ -35,7 +34,7 @@ class Linear(Mapping):
def _set_params(self, x):
self.W = x[:self.input_dim * self.output_dim].reshape(self.input_dim, self.output_dim).copy()
self.bias = x[self.input_dim*self.output_dim:]
self.bias = x[self.input_dim*self.output_dim:].copy()
def randomize(self):
self.W = np.random.randn(self.input_dim, self.output_dim)/np.sqrt(self.input_dim + 1)
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.input_dim + 1)

126
GPy/mappings/mlp.py Normal file
View file

@ -0,0 +1,126 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.mapping import Mapping
class MLP(Mapping):
"""
Mapping based on a multi-layer perceptron neural network model.
.. math::
f(\mathbf{x}*) = \mathbf{W}^0\boldsymbol{\phi}(\mathbf{W}^1\mathbf{x}+\mathb{b}^1)^* + \mathbf{b}^0
where
..math::
\phi(\cdot) = \text{tanh}(\cdot)
:param X: input observations
:type X: ndarray
:param output_dim: dimension of output.
:type output_dim: int
:param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it.
:type hidden_dim: int or list of ints.
"""
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
if isinstance(hidden_dim, int):
hidden_dim = [hidden_dim]
self.hidden_dim = hidden_dim
self.activation = [None]*len(self.hidden_dim)
self.W = []
self._dL_dW = []
self.bias = []
self._dL_dbias = []
self.W.append(np.zeros((self.input_dim, self.hidden_dim[0])))
self._dL_dW.append(np.zeros((self.input_dim, self.hidden_dim[0])))
self.bias.append(np.zeros(self.hidden_dim[0]))
self._dL_dbias.append(np.zeros(self.hidden_dim[0]))
self.num_params = self.hidden_dim[0]*(self.input_dim+1)
for h1, h0 in zip(hidden_dim[1:], hidden_dim[0:-1]):
self.W.append(np.zeros((h0, h1)))
self._dL_dW.append(np.zeros((h0, h1)))
self.bias.append(np.zeros(h1))
self._dL_dbias.append(np.zeros(h1))
self.num_params += h1*(h0+1)
self.W.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
self._dL_dW.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
self.bias.append(np.zeros(self.output_dim))
self._dL_dbias.append(np.zeros(self.output_dim))
self.num_params += self.output_dim*(self.hidden_dim[-1]+1)
self.randomize()
def _get_param_names(self):
return sum([['W%i_%i_%i' % (i, n, d) for n in range(self.W[i].shape[0]) for d in range(self.W[i].shape[1])] + ['bias%i_%i' % (i, d) for d in range(self.W[i].shape[1])] for i in range(len(self.W))], [])
def _get_params(self):
param = np.array([])
for W, bias in zip(self.W, self.bias):
param = np.hstack((param, W.flatten(), bias))
return param
def _set_params(self, x):
start = 0
for W, bias in zip(self.W, self.bias):
end = W.shape[0]*W.shape[1]+start
W[:] = x[start:end].reshape(W.shape[0], W.shape[1]).copy()
start = end
end = W.shape[1]+end
bias[:] = x[start:end].copy()
start = end
def randomize(self):
for W, bias in zip(self.W, self.bias):
W[:] = np.random.randn(W.shape[0], W.shape[1])/np.sqrt(W.shape[0]+1)
bias[:] = np.random.randn(W.shape[1])/np.sqrt(W.shape[0]+1)
def f(self, X):
self._f_computations(X)
return np.dot(np.tanh(self.activation[-1]), self.W[-1]) + self.bias[-1]
def _f_computations(self, X):
W = self.W[0]
bias = self.bias[0]
self.activation[0] = np.dot(X,W) + bias
for W, bias, index in zip(self.W[1:-1], self.bias[1:-1], range(1, len(self.activation))):
self.activation[index] = np.dot(np.tanh(self.activation[index-1]), W)+bias
def df_dtheta(self, dL_df, X):
self._df_computations(dL_df, X)
for gW, gbias in zip(self._dL_dW, self._dL_dbias):
g = np.hstack((g, gW.flatten(), gbias))
return g
def _df_computations(self, dL_df, X):
self._f_computations(X)
a0 = self.activation[-1]
W = self.W[-1]
self._dL_dW[-1] = (dL_df[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
dL_dta=(dL_df[:, None, :]*W[None, :, :]).sum(2)
self._dL_dbias[-1] = (dL_df.sum(0))
for dL_dW, dL_dbias, W, bias, a0, a1 in zip(self._dL_dW[-2:0:-1],
self._dL_dbias[-2:0:-1],
self.W[-2:0:-1],
self.bias[-2:0:-1],
self.activation[-2::-1],
self.activation[-1:0:-1]):
ta = np.tanh(a1)
dL_da = dL_dta*(1-ta*ta)
dL_dW[:] = (dL_da[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
dL_dbias[:] = (dL_da.sum(0))
dL_dta = (dL_da[:, None, :]*W[None, :, :]).sum(2)
ta = np.tanh(self.activation[0])
dL_da = dL_dta*(1-ta*ta)
W = self.W[0]
self._dL_dW[0] = (dL_da[:, :, None]*X[:, None, :]).sum(0).T
self._dL_dbias[0] = (dL_da.sum(0))
self._dL_dX = (dL_da[:, None, :]*W[None, :, :]).sum(2)
g = np.array([])
def df_dX(self, dL_df, X):
self._df_computations(dL_df, X)
return self._dL_dX