Resolve merge conflicts

This commit is contained in:
Mike Croucher 2015-04-01 11:44:20 +01:00
commit e82b5fe773
31 changed files with 1729 additions and 731 deletions

View file

@ -17,45 +17,25 @@ class Additive(Mapping):
:type mapping1: GPy.mappings.Mapping
:param mapping2: second mapping to add together.
:type mapping2: GPy.mappings.Mapping
:param tensor: whether or not to use the tensor product of input spaces
:type tensor: bool
"""
def __init__(self, mapping1, mapping2, tensor=False):
if tensor:
input_dim = mapping1.input_dim + mapping2.input_dim
else:
input_dim = mapping1.input_dim
assert(mapping1.input_dim==mapping2.input_dim)
def __init__(self, mapping1, mapping2):
assert(mapping1.input_dim==mapping2.input_dim)
assert(mapping1.output_dim==mapping2.output_dim)
output_dim = mapping1.output_dim
input_dim, output_dim = mapping1.input_dim, mapping1.output_dim
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.mapping1 = mapping1
self.mapping2 = mapping2
self.num_params = self.mapping1.num_params + self.mapping2.num_params
self.name = self.mapping1.name + '+' + self.mapping2.name
def _get_param_names(self):
return self.mapping1._get_param_names + self.mapping2._get_param_names
def _get_params(self):
return np.hstack((self.mapping1._get_params(), self.mapping2._get_params()))
def _set_params(self, x):
self.mapping1._set_params(x[:self.mapping1.num_params])
self.mapping2._set_params(x[self.mapping1.num_params:])
def randomize(self):
self.mapping1._randomize()
self.mapping2._randomize()
def f(self, X):
return self.mapping1.f(X) + self.mapping2.f(X)
def df_dtheta(self, dL_df, X):
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T
self._df_dbias = (dL_df.sum(0))
return np.hstack((self._df_dA.flatten(), self._df_dbias))
def update_gradients(self, dL_dF, X):
self.mapping1.update_gradients(dL_dF, X)
self.mapping2.update_gradients(dL_dF, X)
def df_dX(self, dL_df, X):
return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
def gradients_X(self, dL_dF, X):
return self.mapping1.gradients_X(dL_dF, X) + self.mapping2.gradients_X(dL_dF, X)

26
GPy/mappings/identity.py Normal file
View file

@ -0,0 +1,26 @@
# Copyright (c) 2015, James Hensman
from ..core.mapping import Mapping
from ..core import Param
class Identity(Mapping):
"""
A mapping that does nothing!
"""
def __init__(self, input_dim, output_dim, name='identity'):
Mapping.__init__(self, input_dim, output_dim, name)
def f(self, X):
return X
def update_gradients(self, dL_dF, X):
pass
def gradients_X(self, dL_dF, X):
return dL_dF

View file

@ -1,9 +1,10 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.mapping import Mapping
import GPy
from ..core import Param
class Kernel(Mapping):
"""
@ -11,50 +12,41 @@ class Kernel(Mapping):
.. math::
f(\mathbf{x}*) = \mathbf{A}\mathbf{k}(\mathbf{X}, \mathbf{x}^*) + \mathbf{b}
f(\mathbf{x}) = \sum_i \alpha_i k(\mathbf{z}_i, \mathbf{x})
:param X: input observations containing :math:`\mathbf{X}`
:type X: ndarray
or for multple outputs
.. math::
f_i(\mathbf{x}) = \sum_j \alpha_{i,j} k(\mathbf{z}_i, \mathbf{x})
:param input_dim: dimension of input.
:type input_dim: int
:param output_dim: dimension of output.
:type output_dim: int
:param Z: input observations containing :math:`\mathbf{Z}`
:type Z: ndarray
:param kernel: a GPy kernel, defaults to GPy.kern.RBF
:type kernel: GPy.kern.kern
"""
def __init__(self, X, output_dim=1, kernel=None):
Mapping.__init__(self, input_dim=X.shape[1], output_dim=output_dim)
if kernel is None:
kernel = GPy.kern.RBF(self.input_dim)
def __init__(self, input_dim, output_dim, Z, kernel, name='kernmap'):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
self.kern = kernel
self.X = X
self.num_data = X.shape[0]
self.num_params = self.output_dim*(self.num_data + 1)
self.A = np.array((self.num_data, self.output_dim))
self.bias = np.array(self.output_dim)
self.randomize()
self.name = 'kernel'
def _get_param_names(self):
return sum([['A_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.num_data)], []) + ['bias_%i' % d for d in range(self.output_dim)]
def _get_params(self):
return np.hstack((self.A.flatten(), self.bias))
def _set_params(self, x):
self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy()
self.bias = x[self.num_data*self.output_dim:].copy()
def randomize(self):
self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1)
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.num_data+1)
self.Z = Z
self.num_bases, Zdim = X.shape
assert Zdim == self.input_dim
self.A = GPy.core.Param('A', np.random.randn(self.num_bases, self.output_dim))
self.add_parameter(self.A)
def f(self, X):
return np.dot(self.kern.K(X, self.X),self.A) + self.bias
return np.dot(self.kern.K(X, self.Z), self.A)
def df_dtheta(self, dL_df, X):
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T
self._df_dbias = (dL_df.sum(0))
return np.hstack((self._df_dA.flatten(), self._df_dbias))
def update_gradients(self, dL_dF, X):
self.kern.update_gradients_full(np.dot(dL_dF, self.A.T))
self.A.gradient = np.dot( self.kern.K(self.Z, X), dL_dF)
def df_dX(self, dL_df, X):
return self.kern.gradients_X((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
def gradients_X(self, dL_dF, X):
return self.kern.gradients_X(np.dot(dL_dF, self.A.T), X, self.Z)

View file

@ -1,43 +1,39 @@
# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core.mapping import Bijective_mapping
from ..core.mapping import Mapping
from ..core.parameterization import Param
class Linear(Bijective_mapping):
class Linear(Mapping):
"""
Mapping based on a linear model.
A Linear mapping.
.. math::
f(\mathbf{x}*) = \mathbf{W}\mathbf{x}^* + \mathbf{b}
F(\mathbf{x}) = \mathbf{A} \mathbf{x})
:param X: input observations
:type X: ndarray
:param input_dim: dimension of input.
:type input_dim: int
:param output_dim: dimension of output.
:type output_dim: int
:param kernel: a GPy kernel, defaults to GPy.kern.RBF
:type kernel: GPy.kern.kern
"""
def __init__(self, input_dim=1, output_dim=1, name='linear'):
Bijective_mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
self.W = Param('W',np.array((self.input_dim, self.output_dim)))
self.bias = Param('bias',np.array(self.output_dim))
self.link_parameters(self.W, self.bias)
def __init__(self, input_dim, output_dim, name='linmap'):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
self.A = GPy.core.Param('A', np.random.randn(self.input_dim, self.output_dim))
self.add_parameter(self.A)
def f(self, X):
return np.dot(X,self.W) + self.bias
return np.dot(X, self.A)
def g(self, f):
V = np.linalg.solve(np.dot(self.W.T, self.W), W.T)
return np.dot(f-self.bias, V)
def update_gradients(self, dL_dF, X):
self.A.gradient = np.dot( X.T, dL_dF)
def df_dtheta(self, dL_df, X):
df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
df_dbias = (dL_df.sum(0))
return np.hstack((df_dW.flatten(), df_dbias))
def dL_dX(self, partial, X):
"""The gradient of L with respect to the inputs to the mapping, where L is a function that is dependent on the output of the mapping, f."""
return (partial[:, None, :]*self.W[None, :, :]).sum(2)
def gradients_X(self, dL_dF, X):
return np.dot(dL_dF, self.A.T)

View file

@ -3,128 +3,40 @@
import numpy as np
from ..core.mapping import Mapping
from ..core import Param
class MLP(Mapping):
"""
Mapping based on a multi-layer perceptron neural network model.
.. math::
f(\\mathbf{x}*) = \\mathbf{W}^0\\boldsymbol{\\phi}(\\mathbf{W}^1\\mathbf{x}+\\mathbf{b}^1)^* + \\mathbf{b}^0
where
.. math::
\\phi(\\cdot) = \\text{tanh}(\\cdot)
:param X: input observations
:type X: ndarray
:param output_dim: dimension of output.
:type output_dim: int
:param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it.
:type hidden_dim: int or list of ints.
Mapping based on a multi-layer perceptron neural network model, with a single hidden layer
"""
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.name = 'mlp'
if isinstance(hidden_dim, int):
hidden_dim = [hidden_dim]
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3, name='mlpmap'):
super(MLP).__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
self.hidden_dim = hidden_dim
self.activation = [None]*len(self.hidden_dim)
self.W = []
self._dL_dW = []
self.bias = []
self._dL_dbias = []
self.W.append(np.zeros((self.input_dim, self.hidden_dim[0])))
self._dL_dW.append(np.zeros((self.input_dim, self.hidden_dim[0])))
self.bias.append(np.zeros(self.hidden_dim[0]))
self._dL_dbias.append(np.zeros(self.hidden_dim[0]))
self.num_params = self.hidden_dim[0]*(self.input_dim+1)
for h1, h0 in zip(hidden_dim[1:], hidden_dim[0:-1]):
self.W.append(np.zeros((h0, h1)))
self._dL_dW.append(np.zeros((h0, h1)))
self.bias.append(np.zeros(h1))
self._dL_dbias.append(np.zeros(h1))
self.num_params += h1*(h0+1)
self.W.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
self._dL_dW.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
self.bias.append(np.zeros(self.output_dim))
self._dL_dbias.append(np.zeros(self.output_dim))
self.num_params += self.output_dim*(self.hidden_dim[-1]+1)
self.randomize()
self.W1 = Param('W1', np.random.randn(self.input_dim, self.hidden_dim))
self.b1 = Param('b1', np.random.randn(self.hidden_dim))
self.W2 = Param('W2', np.random.randn(self.hidden_dim, self.output_dim))
self.b2 = Param('b2', np.random.randn(self.output_dim))
def _get_param_names(self):
return sum([['W%i_%i_%i' % (i, n, d) for n in range(self.W[i].shape[0]) for d in range(self.W[i].shape[1])] + ['bias%i_%i' % (i, d) for d in range(self.W[i].shape[1])] for i in range(len(self.W))], [])
def _get_params(self):
param = np.array([])
for W, bias in zip(self.W, self.bias):
param = np.hstack((param, W.flatten(), bias))
return param
def _set_params(self, x):
start = 0
for W, bias in zip(self.W, self.bias):
end = W.shape[0]*W.shape[1]+start
W[:] = x[start:end].reshape(W.shape[0], W.shape[1]).copy()
start = end
end = W.shape[1]+end
bias[:] = x[start:end].copy()
start = end
def randomize(self):
for W, bias in zip(self.W, self.bias):
W[:] = np.random.randn(W.shape[0], W.shape[1])/np.sqrt(W.shape[0]+1)
bias[:] = np.random.randn(W.shape[1])/np.sqrt(W.shape[0]+1)
def f(self, X):
self._f_computations(X)
return np.dot(np.tanh(self.activation[-1]), self.W[-1]) + self.bias[-1]
N, D = X.shape
activations = np.tanh(np.dot(X,self.W1) + self.b1)
self.out = np.dot(self.activations,self.W2) + self.b2
return self.output_fn(self.out)
def _f_computations(self, X):
W = self.W[0]
bias = self.bias[0]
self.activation[0] = np.dot(X,W) + bias
for W, bias, index in zip(self.W[1:-1], self.bias[1:-1], range(1, len(self.activation))):
self.activation[index] = np.dot(np.tanh(self.activation[index-1]), W)+bias
def update_gradients(self, dL_dF, X):
activations = np.tanh(np.dot(X,self.W1) + self.b1)
def df_dtheta(self, dL_df, X):
self._df_computations(dL_df, X)
g = np.array([])
for gW, gbias in zip(self._dL_dW, self._dL_dbias):
g = np.hstack((g, gW.flatten(), gbias))
return g
def _df_computations(self, dL_df, X):
self._f_computations(X)
a0 = self.activation[-1]
W = self.W[-1]
self._dL_dW[-1] = (dL_df[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
dL_dta=(dL_df[:, None, :]*W[None, :, :]).sum(2)
self._dL_dbias[-1] = (dL_df.sum(0))
for dL_dW, dL_dbias, W, bias, a0, a1 in zip(self._dL_dW[-2:0:-1],
self._dL_dbias[-2:0:-1],
self.W[-2:0:-1],
self.bias[-2:0:-1],
self.activation[-2::-1],
self.activation[-1:0:-1]):
ta = np.tanh(a1)
dL_da = dL_dta*(1-ta*ta)
dL_dW[:] = (dL_da[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
dL_dbias[:] = (dL_da.sum(0))
dL_dta = (dL_da[:, None, :]*W[None, :, :]).sum(2)
ta = np.tanh(self.activation[0])
dL_da = dL_dta*(1-ta*ta)
W = self.W[0]
self._dL_dW[0] = (dL_da[:, :, None]*X[:, None, :]).sum(0).T
self._dL_dbias[0] = (dL_da.sum(0))
self._dL_dX = (dL_da[:, None, :]*W[None, :, :]).sum(2)
#Evaluate second-layer gradients.
self.W2.gradient = np.dot(activations.T, dL_dF)
self.b2.gradient = np.sum(dL_dF, 0)
# Backpropagation to hidden layer.
delta_hid = np.dot(dL_dF, self.W2.T) * (1.0 - activations**2)
# Finally, evaluate the first-layer gradients.
self.W1.gradients = np.dot(X.T,delta_hid)
self.b1.gradients = np.sum(delta_hid, 0)
def df_dX(self, dL_df, X):
self._df_computations(dL_df, X)
return self._dL_dX

View file

@ -0,0 +1,94 @@
from GPy.core.mapping import Mapping
from GPy.core import Param
import numpy as np
class PiecewiseLinear(Mapping):
"""
A piecewise-linear mapping.
The parameters of this mapping are the positions and values of the function where it is broken (self.breaks, self.values).
Outside the range of the breaks, the function is assumed to have gradient 1
"""
def __init__(self, input_dim, output_dim, values, breaks, name='piecewise_linear'):
assert input_dim==1
assert output_dim==1
Mapping.__init__(self, input_dim, output_dim, name)
values, breaks = np.array(values).flatten(), np.array(breaks).flatten()
assert values.size == breaks.size
self.values = Param('values', values)
self.breaks = Param('breaks', breaks)
self.link_parameter(self.values)
self.link_parameter(self.breaks)
def parameters_changed(self):
self.order = np.argsort(self.breaks)*1
self.reverse_order = np.zeros_like(self.order)
self.reverse_order[self.order] = np.arange(self.order.size)
self.sorted_breaks = self.breaks[self.order]
self.sorted_values = self.values[self.order]
self.grads = np.diff(self.sorted_values)/np.diff(self.sorted_breaks)
def f(self, X):
x = X.flatten()
y = x.copy()
#first adjus the points below the first value
y[x<self.sorted_breaks[0]] = x[x<self.sorted_breaks[0]] + self.sorted_values[0] - self.sorted_breaks[0]
#now all the points pas the last break
y[x>self.sorted_breaks[-1]] = x[x>self.sorted_breaks[-1]] + self.sorted_values[-1] - self.sorted_breaks[-1]
#loop throught the pairs of points
for low, up, g, v in zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1]):
i = np.logical_and(x>low, x<up)
y[i] = v + (x[i]-low)*g
return y.reshape(-1,1)
def update_gradients(self, dL_dF, X):
x = X.flatten()
dL_dF = dL_dF.flatten()
dL_db = np.zeros(self.sorted_breaks.size)
dL_dv = np.zeros(self.sorted_values.size)
#loop across each interval, computing the gradient for each of the 4 parameters that define it
for i, (low, up, g, v) in enumerate(zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1])):
index = np.logical_and(x>low, x<up)
xx = x[index]
grad = dL_dF[index]
span = up-low
dL_dv[i] += np.sum(grad*( (low - xx)/span + 1))
dL_dv[i+1] += np.sum(grad*(xx-low)/span)
dL_db[i] += np.sum(grad*g*(xx-up)/span)
dL_db[i+1] += np.sum(grad*g*(low-xx)/span)
#now the end parts
dL_db[0] -= np.sum(dL_dF[x<self.sorted_breaks[0]])
dL_db[-1] -= np.sum(dL_dF[x>self.sorted_breaks[-1]])
dL_dv[0] += np.sum(dL_dF[x<self.sorted_breaks[0]])
dL_dv[-1] += np.sum(dL_dF[x>self.sorted_breaks[-1]])
#now put the gradients back in the correct order!
self.breaks.gradient = dL_db[self.reverse_order]
self.values.gradient = dL_dv[self.reverse_order]
def gradients_X(self, dL_dF, X):
x = X.flatten()
#outside the range of the breakpoints, the function is just offset by a contant, so the partial derivative is 1.
dL_dX = dL_dF.copy().flatten()
#insude the breakpoints, the partial derivative is self.grads
for low, up, g, v in zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1]):
i = np.logical_and(x>low, x<up)
dL_dX[i] = dL_dF[i]*g
return dL_dX.reshape(-1,1)