Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Alan Saul 2015-03-27 14:17:13 +00:00
commit dd6219fb7e
22 changed files with 352 additions and 369 deletions

View file

@ -89,7 +89,7 @@ class GP(Model):
self.link_parameter(self.kern) self.link_parameter(self.kern)
self.link_parameter(self.likelihood) self.link_parameter(self.likelihood)
def set_XY(self, X=None, Y=None): def set_XY(self, X=None, Y=None, trigger_update=True):
""" """
Set the input / output data of the model Set the input / output data of the model
This is useful if we wish to change our existing data but maintain the same model This is useful if we wish to change our existing data but maintain the same model
@ -99,7 +99,7 @@ class GP(Model):
:param Y: output observations :param Y: output observations
:type Y: np.ndarray :type Y: np.ndarray
""" """
self.update_model(False) if trigger_update: self.update_model(False)
if Y is not None: if Y is not None:
if self.normalizer is not None: if self.normalizer is not None:
self.normalizer.scale_by(Y) self.normalizer.scale_by(Y)
@ -123,26 +123,26 @@ class GP(Model):
self.link_parameters(self.X) self.link_parameters(self.X)
else: else:
self.X = ObsAr(X) self.X = ObsAr(X)
self.update_model(True) if trigger_update: self.update_model(True)
self._trigger_params_changed() if trigger_update: self._trigger_params_changed()
def set_X(self,X): def set_X(self,X, trigger_update=True):
""" """
Set the input data of the model Set the input data of the model
:param X: input observations :param X: input observations
:type X: np.ndarray :type X: np.ndarray
""" """
self.set_XY(X=X) self.set_XY(X=X, trigger_update=trigger_update)
def set_Y(self,Y): def set_Y(self,Y, trigger_update=True):
""" """
Set the output data of the model Set the output data of the model
:param X: output observations :param X: output observations
:type X: np.ndarray :type X: np.ndarray
""" """
self.set_XY(Y=Y) self.set_XY(Y=Y, trigger_update=trigger_update)
def parameters_changed(self): def parameters_changed(self):
""" """

View file

@ -1,4 +1,5 @@
# Copyright (c) 2013,2014, GPy authors (see AUTHORS.txt). # Copyright (c) 2013,2014, GPy authors (see AUTHORS.txt).
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys import sys
@ -7,7 +8,7 @@ import numpy as np
class Mapping(Parameterized): class Mapping(Parameterized):
""" """
Base model for shared behavior between models that can act like a mapping. Base model for shared mapping behaviours
""" """
def __init__(self, input_dim, output_dim, name='mapping'): def __init__(self, input_dim, output_dim, name='mapping'):
@ -18,49 +19,12 @@ class Mapping(Parameterized):
def f(self, X): def f(self, X):
raise NotImplementedError raise NotImplementedError
def df_dX(self, dL_df, X): def gradients_X(self, dL_dF, X):
"""Evaluate derivatives of mapping outputs with respect to inputs.
:param dL_df: gradient of the objective with respect to the function.
:type dL_df: ndarray (num_data x output_dim)
:param X: the input locations where derivatives are to be evaluated.
:type X: ndarray (num_data x input_dim)
:returns: matrix containing gradients of the function with respect to the inputs.
"""
raise NotImplementedError raise NotImplementedError
def df_dtheta(self, dL_df, X): def update_gradients(self, dL_dF, X):
"""The gradient of the outputs of the mapping with respect to each of the parameters.
:param dL_df: gradient of the objective with respect to the function.
:type dL_df: ndarray (num_data x output_dim)
:param X: input locations where the function is evaluated.
:type X: ndarray (num_data x input_dim)
:returns: Matrix containing gradients with respect to parameters of each output for each input data.
:rtype: ndarray (num_params length)
"""
raise NotImplementedError raise NotImplementedError
def plot(self, *args):
"""
Plots the mapping associated with the model.
- In one dimension, the function is plotted.
- In two dimensions, a contour-plot shows the function
- In higher dimensions, we've not implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
This is a convenience function: arguments are passed to
GPy.plotting.matplot_dep.models_plots.plot_mapping
"""
if "matplotlib" in sys.modules:
from ..plotting.matplot_dep import models_plots
mapping_plots.plot_mapping(self,*args)
else:
raise NameError, "matplotlib package has not been imported."
class Bijective_mapping(Mapping): class Bijective_mapping(Mapping):
""" """
@ -74,72 +38,4 @@ class Bijective_mapping(Mapping):
"""Inverse mapping from output domain of the function to the inputs.""" """Inverse mapping from output domain of the function to the inputs."""
raise NotImplementedError raise NotImplementedError
from model import Model
class Mapping_check_model(Model):
"""
This is a dummy model class used as a base class for checking that the
gradients of a given mapping are implemented correctly. It enables
checkgradient() to be called independently on each mapping.
"""
def __init__(self, mapping=None, dL_df=None, X=None):
num_samples = 20
if mapping==None:
mapping = GPy.mapping.linear(1, 1)
if X==None:
X = np.random.randn(num_samples, mapping.input_dim)
if dL_df==None:
dL_df = np.ones((num_samples, mapping.output_dim))
self.mapping=mapping
self.X = X
self.dL_df = dL_df
self.num_params = self.mapping.num_params
Model.__init__(self)
def _get_params(self):
return self.mapping._get_params()
def _get_param_names(self):
return self.mapping._get_param_names()
def _set_params(self, x):
self.mapping._set_params(x)
def log_likelihood(self):
return (self.dL_df*self.mapping.f(self.X)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the Mapping_check_model class."
class Mapping_check_df_dtheta(Mapping_check_model):
"""This class allows gradient checks for the gradient of a mapping with respect to parameters. """
def __init__(self, mapping=None, dL_df=None, X=None):
Mapping_check_model.__init__(self,mapping=mapping,dL_df=dL_df, X=X)
def _log_likelihood_gradients(self):
return self.mapping.df_dtheta(self.dL_df, self.X)
class Mapping_check_df_dX(Mapping_check_model):
"""This class allows gradient checks for the gradient of a mapping with respect to X. """
def __init__(self, mapping=None, dL_df=None, X=None):
Mapping_check_model.__init__(self,mapping=mapping,dL_df=dL_df, X=X)
if dL_df==None:
dL_df = np.ones((self.X.shape[0],self.mapping.output_dim))
self.num_params = self.X.shape[0]*self.mapping.input_dim
def _log_likelihood_gradients(self):
return self.mapping.df_dX(self.dL_df, self.X).flatten()
def _get_param_names(self):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)

View file

@ -213,14 +213,14 @@ class Model(Parameterized):
self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e10, 1e10) self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e10, 1e10)
return obj_f, self.obj_grads return obj_f, self.obj_grads
def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, **kwargs): def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs):
""" """
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be: kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations :param max_iters: maximum number of function evaluations
:type max_f_eval: int :type max_iters: int
:messages: True: Display messages during optimisation, "ipython_notebook": :messages: True: Display messages during optimisation, "ipython_notebook":
:type messages: bool"string :type messages: bool"string
:param optimizer: which optimizer to use (defaults to self.preferred optimizer) :param optimizer: which optimizer to use (defaults to self.preferred optimizer)
@ -402,6 +402,7 @@ class Model(Parameterized):
model_details = [['<b>Model</b>', self.name + '<br>'], model_details = [['<b>Model</b>', self.name + '<br>'],
['<b>Log-likelihood</b>', '{}<br>'.format(float(self.log_likelihood()))], ['<b>Log-likelihood</b>', '{}<br>'.format(float(self.log_likelihood()))],
["<b>Number of Parameters</b>", '{}<br>'.format(self.size)], ["<b>Number of Parameters</b>", '{}<br>'.format(self.size)],
["<b>Number of Optimization Parameters</b>", '{}<br>'.format(self._size_transformed())],
["<b>Updates</b>", '{}<br>'.format(self._update_on)], ["<b>Updates</b>", '{}<br>'.format(self._update_on)],
] ]
from operator import itemgetter from operator import itemgetter
@ -419,6 +420,7 @@ class Model(Parameterized):
model_details = [['Name', self.name], model_details = [['Name', self.name],
['Log-likelihood', '{}'.format(float(self.log_likelihood()))], ['Log-likelihood', '{}'.format(float(self.log_likelihood()))],
["Number of Parameters", '{}'.format(self.size)], ["Number of Parameters", '{}'.format(self.size)],
["Number of Optimization Parameters", '{}'.format(self._size_transformed())],
["Updates", '{}'.format(self._update_on)], ["Updates", '{}'.format(self._update_on)],
] ]
from operator import itemgetter from operator import itemgetter

View file

@ -947,7 +947,7 @@ class Parameterizable(OptimizationHandlable):
self._add_parameter_name(param, ignore_added_names) self._add_parameter_name(param, ignore_added_names)
# and makes sure to not delete programmatically added parameters # and makes sure to not delete programmatically added parameters
for other in self.parameters[::-1]: for other in self.parameters[::-1]:
if other is not param and other.name.startswith(param.name): if other is not param and other.name == param.name:
warn_and_retry(param, _name_digit.match(other.name)) warn_and_retry(param, _name_digit.match(other.name))
return return
if pname not in dir(self): if pname not in dir(self):

View file

@ -10,11 +10,6 @@ from parameterization.variational import VariationalPosterior, NormalPosterior
from ..util.linalg import mdot from ..util.linalg import mdot
import logging import logging
from GPy.inference.latent_function_inference.posterior import Posterior
from GPy.inference.optimization.stochastics import SparseGPStochastics,\
SparseGPMissing
#no stochastics.py file added! from GPy.inference.optimization.stochastics import SparseGPStochastics,\
#SparseGPMissing
logger = logging.getLogger("sparse gp") logger = logging.getLogger("sparse gp")
class SparseGP(GP): class SparseGP(GP):
@ -24,6 +19,10 @@ class SparseGP(GP):
This model allows (approximate) inference using variational DTC or FITC This model allows (approximate) inference using variational DTC or FITC
(Gaussian likelihoods) as well as non-conjugate sparse methods based on (Gaussian likelihoods) as well as non-conjugate sparse methods based on
these. these.
This is not for missing data, as the implementation for missing data involves
some inefficient optimization routine decisions.
See missing data SparseGP implementation in py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
:param X: inputs :param X: inputs
:type X: np.ndarray (num_data x input_dim) :type X: np.ndarray (num_data x input_dim)
@ -62,6 +61,14 @@ class SparseGP(GP):
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)
def set_Z(self, Z, trigger_update=True):
if trigger_update: self.update_model(False)
self.unlink_parameter(self.Z)
self.Z = Param('inducing inputs',Z)
self.link_parameter(self.Z, index=0)
if trigger_update: self.update_model(True)
if trigger_update: self._trigger_params_changed()
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)
@ -111,7 +118,7 @@ class SparseGP(GP):
For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we
return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD). return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD).
This is for both with and without missing data. This is for both with and without missing data. See for missing data SparseGP implementation py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
""" """
if kern is None: kern = self.kern if kern is None: kern = self.kern

View file

@ -11,7 +11,7 @@ def exponents(fnow, current_grad):
return np.sign(exps) * np.log10(exps).astype(int) return np.sign(exps) * np.log10(exps).astype(int)
class VerboseOptimization(object): class VerboseOptimization(object):
def __init__(self, model, opt, maxiters, verbose=False, current_iteration=0, ipython_notebook=True): def __init__(self, model, opt, maxiters, verbose=False, current_iteration=0, ipython_notebook=True, clear_after_finish=False):
self.verbose = verbose self.verbose = verbose
if self.verbose: if self.verbose:
self.model = model self.model = model
@ -22,48 +22,52 @@ class VerboseOptimization(object):
self.opt_name = opt.opt_name self.opt_name = opt.opt_name
self.model.add_observer(self, self.print_status) self.model.add_observer(self, self.print_status)
self.status = 'running' self.status = 'running'
self.clear = clear_after_finish
self.update() self.update()
try: try:
from IPython.display import display from IPython.display import display
from IPython.html.widgets import FloatProgressWidget, HTMLWidget, ContainerWidget from IPython.html.widgets import IntProgress, HTML, Box, VBox, HBox, FlexBox
self.text = HTMLWidget() self.text = HTML(width='100%')
self.progress = FloatProgressWidget() self.progress = IntProgress(min=0, max=maxiters)
self.model_show = HTMLWidget() #self.progresstext = Text(width='100%', disabled=True, value='0/{}'.format(maxiters))
self.model_show = HTML()
self.ipython_notebook = ipython_notebook self.ipython_notebook = ipython_notebook
except: except:
# Not in Ipython notebook # Not in Ipython notebook
self.ipython_notebook = False self.ipython_notebook = False
if self.ipython_notebook: if self.ipython_notebook:
self.text.set_css('width', '100%') left_col = VBox(children=[self.progress, self.text], padding=2, width='40%')
#self.progress.set_css('width', '100%') right_col = Box(children=[self.model_show], padding=2, width='60%')
self.hor_align = FlexBox(children = [left_col, right_col], width='100%', orientation='horizontal')
left_col = ContainerWidget(children = [self.progress, self.text]) display(self.hor_align)
right_col = ContainerWidget(children = [self.model_show])
hor_align = ContainerWidget(children = [left_col, right_col]) try:
self.text.set_css('width', '100%')
left_col.set_css({
'padding': '2px',
'width': "100%",
})
right_col.set_css({
'padding': '2px',
})
self.hor_align.set_css({
'width': "100%",
})
display(hor_align) self.hor_align.remove_class('vbox')
self.hor_align.add_class('hbox')
left_col.add_class("box-flex1")
right_col.add_class('box-flex0')
left_col.set_css({ except:
'padding': '2px', pass
'width': "100%",
})
right_col.set_css({
'padding': '2px',
})
hor_align.set_css({
'width': "100%",
})
hor_align.remove_class('vbox')
hor_align.add_class('hbox')
left_col.add_class("box-flex1")
right_col.add_class('box-flex0')
#self.text.add_class('box-flex2') #self.text.add_class('box-flex2')
#self.progress.add_class('box-flex1') #self.progress.add_class('box-flex1')
@ -102,7 +106,8 @@ class VerboseOptimization(object):
html_body += "<td class='tg-right'>{}</td>".format(val) html_body += "<td class='tg-right'>{}</td>".format(val)
html_body += "</tr>" html_body += "</tr>"
self.text.value = html_begin + html_body + html_end self.text.value = html_begin + html_body + html_end
self.progress.value = 100*(self.iteration+1)/self.maxiters self.progress.value = (self.iteration+1)
#self.progresstext.value = '0/{}'.format((self.iteration+1))
self.model_show.value = self.model._repr_html_() self.model_show.value = self.model._repr_html_()
else: else:
n_exps = exponents(self.fnow, self.current_gradient) n_exps = exponents(self.fnow, self.current_gradient)
@ -146,5 +151,7 @@ class VerboseOptimization(object):
if not self.ipython_notebook: if not self.ipython_notebook:
print '' print ''
print 'Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start) print 'Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start)
print 'Optimization status: {0:.5g}'.format(self.status) print 'Optimization status: {0:s}'.format(self.status)
print print
elif self.clear:
self.hor_align.close()

View file

@ -40,8 +40,11 @@ class EP(LatentFunctionInference):
K = kern.K(X) K = kern.K(X)
if self._ep_approximation is None: if self._ep_approximation is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata) mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
else: else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde)) Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))

View file

@ -47,6 +47,8 @@ class SVGP(LatentFunctionInference):
#rescale the F term if working on a batch #rescale the F term if working on a batch
F, dF_dmu, dF_dv = F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale F, dF_dmu, dF_dv = F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale
if dF_dthetaL is not None:
dF_dthetaL = dF_dthetaL.sum(1).sum(1)*batch_scale
#derivatives of expected likelihood #derivatives of expected likelihood
Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal

View file

@ -180,9 +180,12 @@ class Add(CombinationKernel):
def input_sensitivity(self, summarize=True): def input_sensitivity(self, summarize=True):
if summarize: if summarize:
return reduce(np.add, [k.input_sensitivity(summarize) for k in self.parts]) i_s = np.zeros((self.input_dim))
for k in self.parts:
i_s[k.active_dims] += k.input_sensitivity(summarize)
return i_s
else: else:
i_s = np.zeros((len(self.parts), self.input_dim)) i_s = np.zeros((len(self.parts), self.input_dim))
from operator import setitem from operator import setitem
[setitem(i_s, (i, Ellipsis), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)] [setitem(i_s, (i, k.active_dims), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)]
return i_s return i_s

View file

@ -77,7 +77,7 @@ class Bernoulli(Likelihood):
return Z_hat, mu_hat, sigma2_hat return Z_hat, mu_hat, sigma2_hat
def variational_expectations(self, Y, m, v, gh_points=None): def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Probit): if isinstance(self.gp_link, link_functions.Probit):
if gh_points is None: if gh_points is None:

View file

@ -305,18 +305,17 @@ class Gaussian(Likelihood):
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp]) Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
return Ysim.reshape(orig_shape) return Ysim.reshape(orig_shape)
def log_predictive_density(self, y_test, mu_star, var_star): def log_predictive_density(self, y_test, mu_star, var_star, Y_metadata=None):
""" """
assumes independence assumes independence
""" """
v = var_star + self.variance v = var_star + self.variance
return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
def variational_expectations(self, Y, m, v, gh_points=None): def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
lik_var = float(self.variance) lik_var = float(self.variance)
F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var
dF_dmu = (Y - m)/lik_var dF_dmu = (Y - m)/lik_var
dF_dv = np.ones_like(v)*(-0.5/lik_var) dF_dv = np.ones_like(v)*(-0.5/lik_var)
dF_dlik_var = np.sum(-0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2)) dF_dtheta = -0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2)
dF_dtheta = [dF_dlik_var] return F, dF_dmu, dF_dv, dF_dtheta.reshape(1, Y.shape[0], Y.shape[1])
return F, dF_dmu, dF_dv, dF_dtheta

View file

@ -177,7 +177,12 @@ class Likelihood(Parameterized):
if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)): if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)):
stop stop
dF_dtheta = None # Not yet implemented if self.size:
dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None]) # Ntheta x (orig size) x N_{quad_points}
dF_dtheta = np.dot(dF_dtheta, gh_w)
dF_dtheta = dF_dtheta.reshape(self.size, shape[0], shape[1])
else:
dF_dtheta = None # Not yet implemented
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), dF_dtheta return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), dF_dtheta
def predictive_mean(self, mu, variance, Y_metadata=None): def predictive_mean(self, mu, variance, Y_metadata=None):

View file

@ -35,8 +35,8 @@ class StudentT(Likelihood):
self.log_concave = False self.log_concave = False
def parameters_changed(self): #def parameters_changed(self):
self.variance = (self.v / float(self.v - 2)) * self.sigma2 #self.variance = (self.v / float(self.v - 2)) * self.sigma2
def update_gradients(self, grads): def update_gradients(self, grads):
""" """
@ -180,7 +180,8 @@ class StudentT(Likelihood):
:rtype: float :rtype: float
""" """
e = y - inv_link_f e = y - inv_link_f
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) e2 = np.square(e)
dlogpdf_dvar = self.v*(e2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e2))
return dlogpdf_dvar return dlogpdf_dvar
def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None): def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
@ -226,7 +227,7 @@ class StudentT(Likelihood):
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None): def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
return np.hstack((dlogpdf_dvar, dlogpdf_dv)) return np.array((dlogpdf_dvar, dlogpdf_dv))
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None): def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)

View file

@ -17,45 +17,25 @@ class Additive(Mapping):
:type mapping1: GPy.mappings.Mapping :type mapping1: GPy.mappings.Mapping
:param mapping2: second mapping to add together. :param mapping2: second mapping to add together.
:type mapping2: GPy.mappings.Mapping :type mapping2: GPy.mappings.Mapping
:param tensor: whether or not to use the tensor product of input spaces
:type tensor: bool
""" """
def __init__(self, mapping1, mapping2, tensor=False): def __init__(self, mapping1, mapping2):
if tensor: assert(mapping1.input_dim==mapping2.input_dim)
input_dim = mapping1.input_dim + mapping2.input_dim
else:
input_dim = mapping1.input_dim
assert(mapping1.input_dim==mapping2.input_dim)
assert(mapping1.output_dim==mapping2.output_dim) assert(mapping1.output_dim==mapping2.output_dim)
output_dim = mapping1.output_dim input_dim, output_dim = mapping1.input_dim, mapping1.output_dim
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim) Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.mapping1 = mapping1 self.mapping1 = mapping1
self.mapping2 = mapping2 self.mapping2 = mapping2
self.num_params = self.mapping1.num_params + self.mapping2.num_params self.num_params = self.mapping1.num_params + self.mapping2.num_params
self.name = self.mapping1.name + '+' + self.mapping2.name self.name = self.mapping1.name + '+' + self.mapping2.name
def _get_param_names(self):
return self.mapping1._get_param_names + self.mapping2._get_param_names
def _get_params(self):
return np.hstack((self.mapping1._get_params(), self.mapping2._get_params()))
def _set_params(self, x):
self.mapping1._set_params(x[:self.mapping1.num_params])
self.mapping2._set_params(x[self.mapping1.num_params:])
def randomize(self):
self.mapping1._randomize()
self.mapping2._randomize()
def f(self, X): def f(self, X):
return self.mapping1.f(X) + self.mapping2.f(X) return self.mapping1.f(X) + self.mapping2.f(X)
def df_dtheta(self, dL_df, X): def update_gradients(self, dL_dF, X):
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T self.mapping1.update_gradients(dL_dF, X)
self._df_dbias = (dL_df.sum(0)) self.mapping2.update_gradients(dL_dF, X)
return np.hstack((self._df_dA.flatten(), self._df_dbias))
def df_dX(self, dL_df, X): def gradients_X(self, dL_dF, X):
return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X) return self.mapping1.gradients_X(dL_dF, X) + self.mapping2.gradients_X(dL_dF, X)

26
GPy/mappings/identity.py Normal file
View file

@ -0,0 +1,26 @@
# Copyright (c) 2015, James Hensman
from ..core.mapping import Mapping
from ..core import Param
class Identity(Mapping):
"""
A mapping that does nothing!
"""
def __init__(self, input_dim, output_dim, name='identity'):
Mapping.__init__(self, input_dim, output_dim, name)
def f(self, X):
return X
def update_gradients(self, dL_dF, X):
pass
def gradients_X(self, dL_dF, X):
return dL_dF

View file

@ -1,9 +1,10 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ..core.mapping import Mapping from ..core.mapping import Mapping
import GPy from ..core import Param
class Kernel(Mapping): class Kernel(Mapping):
""" """
@ -11,50 +12,41 @@ class Kernel(Mapping):
.. math:: .. math::
f(\mathbf{x}*) = \mathbf{A}\mathbf{k}(\mathbf{X}, \mathbf{x}^*) + \mathbf{b} f(\mathbf{x}) = \sum_i \alpha_i k(\mathbf{z}_i, \mathbf{x})
:param X: input observations containing :math:`\mathbf{X}` or for multple outputs
:type X: ndarray
.. math::
f_i(\mathbf{x}) = \sum_j \alpha_{i,j} k(\mathbf{z}_i, \mathbf{x})
:param input_dim: dimension of input.
:type input_dim: int
:param output_dim: dimension of output. :param output_dim: dimension of output.
:type output_dim: int :type output_dim: int
:param Z: input observations containing :math:`\mathbf{Z}`
:type Z: ndarray
:param kernel: a GPy kernel, defaults to GPy.kern.RBF :param kernel: a GPy kernel, defaults to GPy.kern.RBF
:type kernel: GPy.kern.kern :type kernel: GPy.kern.kern
""" """
def __init__(self, X, output_dim=1, kernel=None): def __init__(self, input_dim, output_dim, Z, kernel, name='kernmap'):
Mapping.__init__(self, input_dim=X.shape[1], output_dim=output_dim) Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
if kernel is None:
kernel = GPy.kern.RBF(self.input_dim)
self.kern = kernel self.kern = kernel
self.X = X self.Z = Z
self.num_data = X.shape[0] self.num_bases, Zdim = X.shape
self.num_params = self.output_dim*(self.num_data + 1) assert Zdim == self.input_dim
self.A = np.array((self.num_data, self.output_dim)) self.A = GPy.core.Param('A', np.random.randn(self.num_bases, self.output_dim))
self.bias = np.array(self.output_dim) self.add_parameter(self.A)
self.randomize()
self.name = 'kernel'
def _get_param_names(self):
return sum([['A_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.num_data)], []) + ['bias_%i' % d for d in range(self.output_dim)]
def _get_params(self):
return np.hstack((self.A.flatten(), self.bias))
def _set_params(self, x):
self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy()
self.bias = x[self.num_data*self.output_dim:].copy()
def randomize(self):
self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1)
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.num_data+1)
def f(self, X): def f(self, X):
return np.dot(self.kern.K(X, self.X),self.A) + self.bias return np.dot(self.kern.K(X, self.Z), self.A)
def df_dtheta(self, dL_df, X): def update_gradients(self, dL_dF, X):
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T self.kern.update_gradients_full(np.dot(dL_dF, self.A.T))
self._df_dbias = (dL_df.sum(0)) self.A.gradient = np.dot( self.kern.K(self.Z, X), dL_dF)
return np.hstack((self._df_dA.flatten(), self._df_dbias))
def df_dX(self, dL_df, X): def gradients_X(self, dL_dF, X):
return self.kern.gradients_X((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X) return self.kern.gradients_X(np.dot(dL_dF, self.A.T), X, self.Z)

View file

@ -1,43 +1,39 @@
# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt). # Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ..core.mapping import Bijective_mapping from ..core.mapping import Mapping
from ..core.parameterization import Param from ..core.parameterization import Param
class Linear(Bijective_mapping): class Linear(Mapping):
""" """
Mapping based on a linear model. A Linear mapping.
.. math:: .. math::
f(\mathbf{x}*) = \mathbf{W}\mathbf{x}^* + \mathbf{b} F(\mathbf{x}) = \mathbf{A} \mathbf{x})
:param X: input observations
:type X: ndarray :param input_dim: dimension of input.
:type input_dim: int
:param output_dim: dimension of output. :param output_dim: dimension of output.
:type output_dim: int :type output_dim: int
:param kernel: a GPy kernel, defaults to GPy.kern.RBF
:type kernel: GPy.kern.kern
""" """
def __init__(self, input_dim=1, output_dim=1, name='linear'): def __init__(self, input_dim, output_dim, name='linmap'):
Bijective_mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name) Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
self.W = Param('W',np.array((self.input_dim, self.output_dim))) self.A = GPy.core.Param('A', np.random.randn(self.input_dim, self.output_dim))
self.bias = Param('bias',np.array(self.output_dim)) self.add_parameter(self.A)
self.link_parameters(self.W, self.bias)
def f(self, X): def f(self, X):
return np.dot(X,self.W) + self.bias return np.dot(X, self.A)
def g(self, f): def update_gradients(self, dL_dF, X):
V = np.linalg.solve(np.dot(self.W.T, self.W), W.T) self.A.gradient = np.dot( X.T, dL_dF)
return np.dot(f-self.bias, V)
def df_dtheta(self, dL_df, X): def gradients_X(self, dL_dF, X):
df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T return np.dot(dL_dF, self.A.T)
df_dbias = (dL_df.sum(0))
return np.hstack((df_dW.flatten(), df_dbias))
def dL_dX(self, partial, X):
"""The gradient of L with respect to the inputs to the mapping, where L is a function that is dependent on the output of the mapping, f."""
return (partial[:, None, :]*self.W[None, :, :]).sum(2)

View file

@ -3,128 +3,40 @@
import numpy as np import numpy as np
from ..core.mapping import Mapping from ..core.mapping import Mapping
from ..core import Param
class MLP(Mapping): class MLP(Mapping):
""" """
Mapping based on a multi-layer perceptron neural network model. Mapping based on a multi-layer perceptron neural network model, with a single hidden layer
.. math::
f(\\mathbf{x}*) = \\mathbf{W}^0\\boldsymbol{\\phi}(\\mathbf{W}^1\\mathbf{x}+\\mathbf{b}^1)^* + \\mathbf{b}^0
where
.. math::
\\phi(\\cdot) = \\text{tanh}(\\cdot)
:param X: input observations
:type X: ndarray
:param output_dim: dimension of output.
:type output_dim: int
:param hidden_dim: dimension of hidden layer. If it is an int, there is one hidden layer of the given dimension. If it is a list of ints there are as manny hidden layers as the length of the list, each with the given number of hidden nodes in it.
:type hidden_dim: int or list of ints.
""" """
def __init__(self, input_dim=1, output_dim=1, hidden_dim=3): def __init__(self, input_dim=1, output_dim=1, hidden_dim=3, name='mlpmap'):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim) super(MLP).__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
self.name = 'mlp'
if isinstance(hidden_dim, int):
hidden_dim = [hidden_dim]
self.hidden_dim = hidden_dim self.hidden_dim = hidden_dim
self.activation = [None]*len(self.hidden_dim) self.W1 = Param('W1', np.random.randn(self.input_dim, self.hidden_dim))
self.W = [] self.b1 = Param('b1', np.random.randn(self.hidden_dim))
self._dL_dW = [] self.W2 = Param('W2', np.random.randn(self.hidden_dim, self.output_dim))
self.bias = [] self.b2 = Param('b2', np.random.randn(self.output_dim))
self._dL_dbias = []
self.W.append(np.zeros((self.input_dim, self.hidden_dim[0])))
self._dL_dW.append(np.zeros((self.input_dim, self.hidden_dim[0])))
self.bias.append(np.zeros(self.hidden_dim[0]))
self._dL_dbias.append(np.zeros(self.hidden_dim[0]))
self.num_params = self.hidden_dim[0]*(self.input_dim+1)
for h1, h0 in zip(hidden_dim[1:], hidden_dim[0:-1]):
self.W.append(np.zeros((h0, h1)))
self._dL_dW.append(np.zeros((h0, h1)))
self.bias.append(np.zeros(h1))
self._dL_dbias.append(np.zeros(h1))
self.num_params += h1*(h0+1)
self.W.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
self._dL_dW.append(np.zeros((self.hidden_dim[-1], self.output_dim)))
self.bias.append(np.zeros(self.output_dim))
self._dL_dbias.append(np.zeros(self.output_dim))
self.num_params += self.output_dim*(self.hidden_dim[-1]+1)
self.randomize()
def _get_param_names(self):
return sum([['W%i_%i_%i' % (i, n, d) for n in range(self.W[i].shape[0]) for d in range(self.W[i].shape[1])] + ['bias%i_%i' % (i, d) for d in range(self.W[i].shape[1])] for i in range(len(self.W))], [])
def _get_params(self):
param = np.array([])
for W, bias in zip(self.W, self.bias):
param = np.hstack((param, W.flatten(), bias))
return param
def _set_params(self, x):
start = 0
for W, bias in zip(self.W, self.bias):
end = W.shape[0]*W.shape[1]+start
W[:] = x[start:end].reshape(W.shape[0], W.shape[1]).copy()
start = end
end = W.shape[1]+end
bias[:] = x[start:end].copy()
start = end
def randomize(self):
for W, bias in zip(self.W, self.bias):
W[:] = np.random.randn(W.shape[0], W.shape[1])/np.sqrt(W.shape[0]+1)
bias[:] = np.random.randn(W.shape[1])/np.sqrt(W.shape[0]+1)
def f(self, X): def f(self, X):
self._f_computations(X) N, D = X.shape
return np.dot(np.tanh(self.activation[-1]), self.W[-1]) + self.bias[-1] activations = np.tanh(np.dot(X,self.W1) + self.b1)
self.out = np.dot(self.activations,self.W2) + self.b2
return self.output_fn(self.out)
def _f_computations(self, X): def update_gradients(self, dL_dF, X):
W = self.W[0] activations = np.tanh(np.dot(X,self.W1) + self.b1)
bias = self.bias[0]
self.activation[0] = np.dot(X,W) + bias
for W, bias, index in zip(self.W[1:-1], self.bias[1:-1], range(1, len(self.activation))):
self.activation[index] = np.dot(np.tanh(self.activation[index-1]), W)+bias
def df_dtheta(self, dL_df, X):
self._df_computations(dL_df, X)
g = np.array([])
for gW, gbias in zip(self._dL_dW, self._dL_dbias):
g = np.hstack((g, gW.flatten(), gbias))
return g
def _df_computations(self, dL_df, X): #Evaluate second-layer gradients.
self._f_computations(X) self.W2.gradient = np.dot(activations.T, dL_dF)
a0 = self.activation[-1] self.b2.gradient = np.sum(dL_dF, 0)
W = self.W[-1]
self._dL_dW[-1] = (dL_df[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T # Backpropagation to hidden layer.
dL_dta=(dL_df[:, None, :]*W[None, :, :]).sum(2) delta_hid = np.dot(dL_dF, self.W2.T) * (1.0 - activations**2)
self._dL_dbias[-1] = (dL_df.sum(0))
for dL_dW, dL_dbias, W, bias, a0, a1 in zip(self._dL_dW[-2:0:-1], # Finally, evaluate the first-layer gradients.
self._dL_dbias[-2:0:-1], self.W1.gradients = np.dot(X.T,delta_hid)
self.W[-2:0:-1], self.b1.gradients = np.sum(delta_hid, 0)
self.bias[-2:0:-1],
self.activation[-2::-1],
self.activation[-1:0:-1]):
ta = np.tanh(a1)
dL_da = dL_dta*(1-ta*ta)
dL_dW[:] = (dL_da[:, :, None]*np.tanh(a0[:, None, :])).sum(0).T
dL_dbias[:] = (dL_da.sum(0))
dL_dta = (dL_da[:, None, :]*W[None, :, :]).sum(2)
ta = np.tanh(self.activation[0])
dL_da = dL_dta*(1-ta*ta)
W = self.W[0]
self._dL_dW[0] = (dL_da[:, :, None]*X[:, None, :]).sum(0).T
self._dL_dbias[0] = (dL_da.sum(0))
self._dL_dX = (dL_da[:, None, :]*W[None, :, :]).sum(2)
def df_dX(self, dL_df, X):
self._df_computations(dL_df, X)
return self._dL_dX

View file

@ -0,0 +1,94 @@
from GPy.core.mapping import Mapping
from GPy.core import Param
import numpy as np
class PiecewiseLinear(Mapping):
"""
A piecewise-linear mapping.
The parameters of this mapping are the positions and values of the function where it is broken (self.breaks, self.values).
Outside the range of the breaks, the function is assumed to have gradient 1
"""
def __init__(self, input_dim, output_dim, values, breaks, name='piecewise_linear'):
assert input_dim==1
assert output_dim==1
Mapping.__init__(self, input_dim, output_dim, name)
values, breaks = np.array(values).flatten(), np.array(breaks).flatten()
assert values.size == breaks.size
self.values = Param('values', values)
self.breaks = Param('breaks', breaks)
self.link_parameter(self.values)
self.link_parameter(self.breaks)
def parameters_changed(self):
self.order = np.argsort(self.breaks)*1
self.reverse_order = np.zeros_like(self.order)
self.reverse_order[self.order] = np.arange(self.order.size)
self.sorted_breaks = self.breaks[self.order]
self.sorted_values = self.values[self.order]
self.grads = np.diff(self.sorted_values)/np.diff(self.sorted_breaks)
def f(self, X):
x = X.flatten()
y = x.copy()
#first adjus the points below the first value
y[x<self.sorted_breaks[0]] = x[x<self.sorted_breaks[0]] + self.sorted_values[0] - self.sorted_breaks[0]
#now all the points pas the last break
y[x>self.sorted_breaks[-1]] = x[x>self.sorted_breaks[-1]] + self.sorted_values[-1] - self.sorted_breaks[-1]
#loop throught the pairs of points
for low, up, g, v in zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1]):
i = np.logical_and(x>low, x<up)
y[i] = v + (x[i]-low)*g
return y.reshape(-1,1)
def update_gradients(self, dL_dF, X):
x = X.flatten()
dL_dF = dL_dF.flatten()
dL_db = np.zeros(self.sorted_breaks.size)
dL_dv = np.zeros(self.sorted_values.size)
#loop across each interval, computing the gradient for each of the 4 parameters that define it
for i, (low, up, g, v) in enumerate(zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1])):
index = np.logical_and(x>low, x<up)
xx = x[index]
grad = dL_dF[index]
span = up-low
dL_dv[i] += np.sum(grad*( (low - xx)/span + 1))
dL_dv[i+1] += np.sum(grad*(xx-low)/span)
dL_db[i] += np.sum(grad*g*(xx-up)/span)
dL_db[i+1] += np.sum(grad*g*(low-xx)/span)
#now the end parts
dL_db[0] -= np.sum(dL_dF[x<self.sorted_breaks[0]])
dL_db[-1] -= np.sum(dL_dF[x>self.sorted_breaks[-1]])
dL_dv[0] += np.sum(dL_dF[x<self.sorted_breaks[0]])
dL_dv[-1] += np.sum(dL_dF[x>self.sorted_breaks[-1]])
#now put the gradients back in the correct order!
self.breaks.gradient = dL_db[self.reverse_order]
self.values.gradient = dL_dv[self.reverse_order]
def gradients_X(self, dL_dF, X):
x = X.flatten()
#outside the range of the breakpoints, the function is just offset by a contant, so the partial derivative is 1.
dL_dX = dL_dF.copy().flatten()
#insude the breakpoints, the partial derivative is self.grads
for low, up, g, v in zip(self. sorted_breaks[:-1], self.sorted_breaks[1:], self.grads, self.sorted_values[:-1]):
i = np.logical_and(x>low, x<up)
dL_dX[i] = dL_dF[i]*g
return dL_dX.reshape(-1,1)

View file

@ -5,6 +5,30 @@ import unittest
import numpy as np import numpy as np
import GPy import GPy
class MappingGradChecker(GPy.core.Model):
"""
This class has everything we need to check the gradient of a mapping. It
implement a simple likelihood which is the sum of the outputs of the
mapping. the gradients are checked against the parameters of the mapping
and the input.
"""
def __init__(self, mapping, X, name):
super(MappingChecker).__init__(self, name)
self.mapping = mapping
self.add_parameter(self.mapping)
self.X = GPy.core.Param('X',X)
self.add_parameter(self.X)
self.dL_dY = np.ones((self.X.shape[0]. self.mapping.output_dim))
def log_likelihood(self):
return np.sum(self.mapping.f(X))
def parameters_changed(self):
self.X.gradient = self.mapping.gradients_X(self.dL_dY, self.X)
self.mapping.update_gradients(self.dL_dY, self.X)
class MappingTests(unittest.TestCase): class MappingTests(unittest.TestCase):

View file

@ -15,4 +15,4 @@ import netpbmfile
import inference_plots import inference_plots
import maps import maps
import img_plots import img_plots
from ssgplvm import SSGPLVM_plot from ssgplvm import SSGPLVM_plot

34
GPy/testing/svgp_tests.py Normal file
View file

@ -0,0 +1,34 @@
import numpy as np
import scipy as sp
import GPy
class SVGP_nonconvex(np.testing.TestCase):
"""
Inference in the SVGP with a student-T likelihood
"""
def setUp(self):
X = np.linspace(0,10,100).reshape(-1,1)
Z = np.linspace(0,10,10).reshape(-1,1)
Y = np.sin(X) + np.random.randn(*X.shape)*0.1
Y[50] += 3
lik = GPy.likelihoods.StudentT(deg_free=2)
k = GPy.kern.RBF(1, lengthscale=5.) + GPy.kern.White(1, 1e-6)
self.m = GPy.core.SVGP(X, Y, Z=Z, likelihood=lik, kernel=k)
def test_grad(self):
assert self.m.checkgrad(step=1e-4)
class SVGP_classification(np.testing.TestCase):
"""
Inference in the SVGP with a Bernoulli likelihood
"""
def setUp(self):
X = np.linspace(0,10,100).reshape(-1,1)
Z = np.linspace(0,10,10).reshape(-1,1)
Y = np.where((np.sin(X) + np.random.randn(*X.shape)*0.1)>0, 1,0)
lik = GPy.likelihoods.Bernoulli()
k = GPy.kern.RBF(1, lengthscale=5.) + GPy.kern.White(1, 1e-6)
self.m = GPy.core.SVGP(X, Y, Z=Z, likelihood=lik, kernel=k)
def test_grad(self):
assert self.m.checkgrad(step=1e-4)