mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-14 06:22:38 +02:00
lots of changes to mappings
This commit is contained in:
parent
99545500b1
commit
9976e56bea
7 changed files with 202 additions and 238 deletions
|
|
@ -3,128 +3,40 @@
|
|||
|
||||
import numpy as np
|
||||
from ..core.mapping import Mapping
|
||||
from ..core import Param
|
||||
|
||||
class MLP(Mapping):
|
||||
"""
|
||||
Mapping based on a multi-layer perceptron neural network model.
|
||||
|
||||
.. math::
|
||||
|
||||
f(\\mathbf{x}*) = \\mathbf{W}^0\\boldsymbol{\\phi}(\\mathbf{W}^1\\mathbf{x}+\\mathbf{b}^1)^* + \\mathbf{b}^0
|
||||
|
||||
where
|
||||
|
||||
.. math::
|
||||
|
||||
\\phi(\\cdot) = \\text{tanh}(\\cdot)
|
||||
|
||||
:param X: input observations
|
||||
:type X: ndarray
|
||||
:param output_dim: dimension of output.
|
||||
:type output_dim: int
|
||||
:param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it.
|
||||
:type hidden_dim: int or list of ints.
|
||||
|
||||
Mapping based on a multi-layer perceptron neural network model, with a single hidden layer
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3):
|
||||
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
|
||||
self.name = 'mlp'
|
||||
if isinstance(hidden_dim, int):
|
||||
hidden_dim = [hidden_dim]
|
||||
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3, name='mlpmap'):
|
||||
super(MLP).__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
|
||||
self.hidden_dim = hidden_dim
|
||||
self.activation = [None]*len(self.hidden_dim)
|
||||
self.W = []
|
||||
self._dL_dW = []
|
||||
self.bias = []
|
||||
self._dL_dbias = []
|
||||
self.W.append(np.zeros((self.input_dim, self.hidden_dim[0])))
|
||||
self._dL_dW.append(np.zeros((self.input_dim, self.hidden_dim[0])))
|
||||
self.bias.append(np.zeros(self.hidden_dim[0]))
|
||||
self._dL_dbias.append(np.zeros(self.hidden_dim[0]))
|
||||
self.num_params = self.hidden_dim[0]*(self.input_dim+1)
|
||||
for h1, h0 in zip(hidden_dim[1:], hidden_dim[0:-1]):
|
||||
self.W.append(np.zeros((h0, h1)))
|
||||
self._dL_dW.append(np.zeros((h0, h1)))
|
||||
self.bias.append(np.zeros(h1))
|
||||
self._dL_dbias.append(np.zeros(h1))
|
||||
self.num_params += h1*(h0+1)
|
||||
self.W.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
|
||||
self._dL_dW.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
|
||||
self.bias.append(np.zeros(self.output_dim))
|
||||
self._dL_dbias.append(np.zeros(self.output_dim))
|
||||
self.num_params += self.output_dim*(self.hidden_dim[-1]+1)
|
||||
self.randomize()
|
||||
self.W1 = Param('W1', np.random.randn(self.input_dim, self.hidden_dim))
|
||||
self.b1 = Param('b1', np.random.randn(self.hidden_dim))
|
||||
self.W2 = Param('W2', np.random.randn(self.hidden_dim, self.output_dim))
|
||||
self.b2 = Param('b2', np.random.randn(self.output_dim))
|
||||
|
||||
def _get_param_names(self):
|
||||
return sum([['W%i_%i_%i' % (i, n, d) for n in range(self.W[i].shape[0]) for d in range(self.W[i].shape[1])] + ['bias%i_%i' % (i, d) for d in range(self.W[i].shape[1])] for i in range(len(self.W))], [])
|
||||
|
||||
def _get_params(self):
|
||||
param = np.array([])
|
||||
for W, bias in zip(self.W, self.bias):
|
||||
param = np.hstack((param, W.flatten(), bias))
|
||||
return param
|
||||
|
||||
def _set_params(self, x):
|
||||
start = 0
|
||||
for W, bias in zip(self.W, self.bias):
|
||||
end = W.shape[0]*W.shape[1]+start
|
||||
W[:] = x[start:end].reshape(W.shape[0], W.shape[1]).copy()
|
||||
start = end
|
||||
end = W.shape[1]+end
|
||||
bias[:] = x[start:end].copy()
|
||||
start = end
|
||||
|
||||
def randomize(self):
|
||||
for W, bias in zip(self.W, self.bias):
|
||||
W[:] = np.random.randn(W.shape[0], W.shape[1])/np.sqrt(W.shape[0]+1)
|
||||
bias[:] = np.random.randn(W.shape[1])/np.sqrt(W.shape[0]+1)
|
||||
|
||||
def f(self, X):
|
||||
self._f_computations(X)
|
||||
return np.dot(np.tanh(self.activation[-1]), self.W[-1]) + self.bias[-1]
|
||||
N, D = X.shape
|
||||
activations = np.tanh(np.dot(X,self.W1) + self.b1)
|
||||
self.out = np.dot(self.activations,self.W2) + self.b2
|
||||
return self.output_fn(self.out)
|
||||
|
||||
def _f_computations(self, X):
|
||||
W = self.W[0]
|
||||
bias = self.bias[0]
|
||||
self.activation[0] = np.dot(X,W) + bias
|
||||
for W, bias, index in zip(self.W[1:-1], self.bias[1:-1], range(1, len(self.activation))):
|
||||
self.activation[index] = np.dot(np.tanh(self.activation[index-1]), W)+bias
|
||||
def update_gradients(self, dL_dF, X):
|
||||
activations = np.tanh(np.dot(X,self.W1) + self.b1)
|
||||
|
||||
def df_dtheta(self, dL_df, X):
|
||||
self._df_computations(dL_df, X)
|
||||
g = np.array([])
|
||||
for gW, gbias in zip(self._dL_dW, self._dL_dbias):
|
||||
g = np.hstack((g, gW.flatten(), gbias))
|
||||
return g
|
||||
|
||||
def _df_computations(self, dL_df, X):
|
||||
self._f_computations(X)
|
||||
a0 = self.activation[-1]
|
||||
W = self.W[-1]
|
||||
self._dL_dW[-1] = (dL_df[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
|
||||
dL_dta=(dL_df[:, None, :]*W[None, :, :]).sum(2)
|
||||
self._dL_dbias[-1] = (dL_df.sum(0))
|
||||
for dL_dW, dL_dbias, W, bias, a0, a1 in zip(self._dL_dW[-2:0:-1],
|
||||
self._dL_dbias[-2:0:-1],
|
||||
self.W[-2:0:-1],
|
||||
self.bias[-2:0:-1],
|
||||
self.activation[-2::-1],
|
||||
self.activation[-1:0:-1]):
|
||||
ta = np.tanh(a1)
|
||||
dL_da = dL_dta*(1-ta*ta)
|
||||
dL_dW[:] = (dL_da[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
|
||||
dL_dbias[:] = (dL_da.sum(0))
|
||||
dL_dta = (dL_da[:, None, :]*W[None, :, :]).sum(2)
|
||||
ta = np.tanh(self.activation[0])
|
||||
dL_da = dL_dta*(1-ta*ta)
|
||||
W = self.W[0]
|
||||
self._dL_dW[0] = (dL_da[:, :, None]*X[:, None, :]).sum(0).T
|
||||
self._dL_dbias[0] = (dL_da.sum(0))
|
||||
self._dL_dX = (dL_da[:, None, :]*W[None, :, :]).sum(2)
|
||||
#Evaluate second-layer gradients.
|
||||
self.W2.gradient = np.dot(activations.T, dL_dF)
|
||||
self.b2.gradient = np.sum(dL_dF, 0)
|
||||
|
||||
# Backpropagation to hidden layer.
|
||||
delta_hid = np.dot(dL_dF, self.W2.T) * (1.0 - activations**2)
|
||||
|
||||
# Finally, evaluate the first-layer gradients.
|
||||
self.W1.gradients = np.dot(X.T,delta_hid)
|
||||
self.b1.gradients = np.sum(delta_hid, 0)
|
||||
|
||||
|
||||
def df_dX(self, dL_df, X):
|
||||
self._df_computations(dL_df, X)
|
||||
return self._dL_dX
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue