mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-03 00:32:39 +02:00
[updates] starting to extract out standalone modules
This commit is contained in:
commit
9b2e49c949
19 changed files with 563 additions and 958 deletions
|
|
@ -8,5 +8,4 @@ from parameterization.observable_array import ObsAr
|
|||
|
||||
from gp import GP
|
||||
from sparse_gp import SparseGP
|
||||
from svigp import SVIGP
|
||||
from mapping import *
|
||||
|
|
|
|||
131
GPy/core/gp.py
131
GPy/core/gp.py
|
|
@ -3,16 +3,12 @@
|
|||
|
||||
import numpy as np
|
||||
import sys
|
||||
import warnings
|
||||
from .. import kern
|
||||
from ..util.linalg import dtrtrs
|
||||
from model import Model
|
||||
from parameterization import ObsAr
|
||||
from .. import likelihoods
|
||||
from ..likelihoods.gaussian import Gaussian
|
||||
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
|
||||
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
|
||||
from parameterization.variational import VariationalPosterior
|
||||
from scipy.sparse.base import issparse
|
||||
|
||||
import logging
|
||||
from GPy.util.normalizer import MeanNorm
|
||||
|
|
@ -26,7 +22,7 @@ class GP(Model):
|
|||
:param Y: output observations
|
||||
:param kernel: a GPy kernel, defaults to rbf+white
|
||||
:param likelihood: a GPy likelihood
|
||||
:param :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference_method: The inference method to use for this GP
|
||||
:param inference_method: The :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference method to use for this GP
|
||||
:rtype: model object
|
||||
:param Norm normalizer:
|
||||
normalize the outputs Y.
|
||||
|
|
@ -95,10 +91,13 @@ class GP(Model):
|
|||
|
||||
def set_XY(self, X=None, Y=None):
|
||||
"""
|
||||
Set the input / output of the model
|
||||
Set the input / output data of the model
|
||||
This is useful if we wish to change our existing data but maintain the same model
|
||||
|
||||
:param X: input observations
|
||||
:type X: np.ndarray
|
||||
:param Y: output observations
|
||||
:type Y: np.ndarray
|
||||
"""
|
||||
self.update_model(False)
|
||||
if Y is not None:
|
||||
|
|
@ -112,7 +111,6 @@ class GP(Model):
|
|||
if X is not None:
|
||||
if self.X in self.parameters:
|
||||
# LVM models
|
||||
from ..core.parameterization.variational import VariationalPosterior
|
||||
if isinstance(self.X, VariationalPosterior):
|
||||
assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
|
||||
self.unlink_parameter(self.X)
|
||||
|
|
@ -129,22 +127,39 @@ class GP(Model):
|
|||
|
||||
def set_X(self,X):
|
||||
"""
|
||||
Set the input of the model
|
||||
Set the input data of the model
|
||||
|
||||
:param X: input observations
|
||||
:type X: np.ndarray
|
||||
"""
|
||||
self.set_XY(X=X)
|
||||
|
||||
def set_Y(self,Y):
|
||||
"""
|
||||
Set the input of the model
|
||||
Set the output data of the model
|
||||
|
||||
:param X: output observations
|
||||
:type X: np.ndarray
|
||||
"""
|
||||
self.set_XY(Y=Y)
|
||||
|
||||
def parameters_changed(self):
|
||||
"""
|
||||
Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
|
||||
In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood and gradients of the model
|
||||
|
||||
.. warning::
|
||||
This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
|
||||
this method yourself, there may be unexpected consequences.
|
||||
"""
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
|
||||
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
|
||||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
The log marginal likelihood of the model, :math:`p(\mathbf{y})`, this is the objective function of the model being optimised
|
||||
"""
|
||||
return self._log_marginal_likelihood
|
||||
|
||||
def _raw_predict(self, _Xnew, full_cov=False, kern=None):
|
||||
|
|
@ -155,12 +170,10 @@ class GP(Model):
|
|||
of the prediction is computed. If full_cov is False (default), only the
|
||||
diagonal of the covariance is returned.
|
||||
|
||||
$$
|
||||
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
|
||||
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
|
||||
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
$$
|
||||
|
||||
.. math::
|
||||
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
|
||||
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
|
||||
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
|
||||
"""
|
||||
if kern is None:
|
||||
kern = self.kern
|
||||
|
|
@ -185,23 +198,20 @@ class GP(Model):
|
|||
Predict the function(s) at the new point(s) Xnew.
|
||||
|
||||
:param Xnew: The points at which to make a prediction
|
||||
:type Xnew: np.ndarray, Nnew x self.input_dim
|
||||
:type Xnew: np.ndarray (Nnew x self.input_dim)
|
||||
:param full_cov: whether to return the full covariance matrix, or just
|
||||
the diagonal
|
||||
:type full_cov: bool
|
||||
:param Y_metadata: metadata about the predicting point to pass to the likelihood
|
||||
:param kern: The kernel to use for prediction (defaults to the model
|
||||
kern). this is useful for examining e.g. subprocesses.
|
||||
:returns: mean: posterior mean, a Numpy array, Nnew x self.input_dim
|
||||
:returns: var: posterior variance, a Numpy array, Nnew x 1 if
|
||||
full_cov=False, Nnew x Nnew otherwise
|
||||
:returns: lower and upper boundaries of the 95% confidence intervals,
|
||||
Numpy arrays, Nnew x self.input_dim
|
||||
|
||||
:returns: (mean, var, lower_upper):
|
||||
mean: posterior mean, a Numpy array, Nnew x self.input_dim
|
||||
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
||||
lower_upper: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
|
||||
|
||||
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
|
||||
This is to allow for different normalizations of the output dimensions.
|
||||
|
||||
"""
|
||||
#predict the latent function values
|
||||
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
|
||||
|
|
@ -213,6 +223,16 @@ class GP(Model):
|
|||
return mean, var
|
||||
|
||||
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
|
||||
"""
|
||||
Get the predictive quantiles around the prediction at X
|
||||
|
||||
:param X: The points at which to make a prediction
|
||||
:type X: np.ndarray (Xnew x self.input_dim)
|
||||
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
|
||||
:type quantiles: tuple
|
||||
:returns: list of quantiles for each X and predictive quantiles for interval combination
|
||||
:rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)]
|
||||
"""
|
||||
m, v = self._raw_predict(X, full_cov=False)
|
||||
if self.normalizer is not None:
|
||||
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
|
||||
|
|
@ -225,7 +245,12 @@ class GP(Model):
|
|||
Given a set of points at which to predict X* (size [N*,Q]), compute the
|
||||
derivatives of the mean and variance. Resulting arrays are sized:
|
||||
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
|
||||
|
||||
dv_dX* -- [N*, Q], (since all outputs have the same variance)
|
||||
:param X: The points at which to get the predictive gradients
|
||||
:type X: np.ndarray (Xnew x self.input_dim)
|
||||
:returns: dmu_dX, dv_dX
|
||||
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
|
||||
|
||||
"""
|
||||
dmu_dX = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
|
||||
|
|
@ -245,12 +270,13 @@ class GP(Model):
|
|||
Samples the posterior GP at the points X.
|
||||
|
||||
:param X: The points at which to take the samples.
|
||||
:type X: np.ndarray, Nnew x self.input_dim.
|
||||
:type X: np.ndarray (Nnew x self.input_dim)
|
||||
:param size: the number of a posteriori samples.
|
||||
:type size: int.
|
||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
|
||||
:type full_cov: bool.
|
||||
:returns: Ysim: set of simulations, a Numpy array (N x samples).
|
||||
:returns: Ysim: set of simulations
|
||||
:rtype: np.ndarray (N x samples)
|
||||
"""
|
||||
m, v = self._raw_predict(X, full_cov=full_cov)
|
||||
if self.normalizer is not None:
|
||||
|
|
@ -268,7 +294,7 @@ class GP(Model):
|
|||
Samples the posterior GP at the points X.
|
||||
|
||||
:param X: the points at which to take the samples.
|
||||
:type X: np.ndarray, Nnew x self.input_dim.
|
||||
:type X: np.ndarray (Nnew x self.input_dim.)
|
||||
:param size: the number of a posteriori samples.
|
||||
:type size: int.
|
||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
|
||||
|
|
@ -292,6 +318,37 @@ class GP(Model):
|
|||
This is a call to plot with plot_raw=True.
|
||||
Data will not be plotted in this, as the GP's view of the world
|
||||
may live in another space, or units then the data.
|
||||
|
||||
Can plot only part of the data and part of the posterior functions
|
||||
using which_data_rowsm which_data_ycols.
|
||||
|
||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
||||
:type plot_limits: np.array
|
||||
:param which_data_rows: which of the training data to plot (default all)
|
||||
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
|
||||
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
||||
:type which_data_ycols: 'all' or a list of integers
|
||||
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
||||
:type fixed_inputs: a list of tuples
|
||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||
:type resolution: int
|
||||
:param levels: number of levels to plot in a contour plot.
|
||||
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
||||
:type levels: int
|
||||
:param samples: the number of a posteriori samples to plot
|
||||
:type samples: int
|
||||
:param fignum: figure to plot on.
|
||||
:type fignum: figure number
|
||||
:param ax: axes to plot on.
|
||||
:type ax: axes handle
|
||||
:param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
|
||||
:type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
|
||||
:param fillcol: color of fill [Tango.colorsHex['lightBlue']]
|
||||
:type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
|
||||
:param Y_metadata: additional data associated with Y which may be needed
|
||||
:type Y_metadata: dict
|
||||
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
|
||||
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import models_plots
|
||||
|
|
@ -325,12 +382,13 @@ class GP(Model):
|
|||
:param which_data_rows: which of the training data to plot (default all)
|
||||
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
|
||||
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
||||
:type which_data_rows: 'all' or a list of integers
|
||||
:type which_data_ycols: 'all' or a list of integers
|
||||
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
||||
:type fixed_inputs: a list of tuples
|
||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||
:type resolution: int
|
||||
:param levels: number of levels to plot in a contour plot.
|
||||
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
||||
:type levels: int
|
||||
:param samples: the number of a posteriori samples to plot
|
||||
:type samples: int
|
||||
|
|
@ -338,11 +396,14 @@ class GP(Model):
|
|||
:type fignum: figure number
|
||||
:param ax: axes to plot on.
|
||||
:type ax: axes handle
|
||||
:type output: integer (first output is 0)
|
||||
:param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
|
||||
:type linecol:
|
||||
:type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
|
||||
:param fillcol: color of fill [Tango.colorsHex['lightBlue']]
|
||||
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
|
||||
:type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
|
||||
:param Y_metadata: additional data associated with Y which may be needed
|
||||
:type Y_metadata: dict
|
||||
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
|
||||
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import models_plots
|
||||
|
|
@ -372,10 +433,8 @@ class GP(Model):
|
|||
:type max_f_eval: int
|
||||
:messages: whether to display during optimisation
|
||||
:type messages: bool
|
||||
:param optimizer: which optimizer to use (defaults to self.preferred optimizer)
|
||||
:param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'.
|
||||
:type optimizer: string
|
||||
|
||||
TODO: valid args
|
||||
"""
|
||||
self.inference_method.on_optimization_start()
|
||||
try:
|
||||
|
|
@ -394,7 +453,11 @@ class GP(Model):
|
|||
:param optimize: whether to optimize the location of new X (True by default)
|
||||
:type optimize: boolean
|
||||
:return: a tuple containing the posterior estimation of X and the model that optimize X
|
||||
<<<<<<< HEAD
|
||||
:rtype: (GPy.core.parameterization.variational.VariationalPosterior or numpy.ndarray, GPy.core.Model)
|
||||
=======
|
||||
:rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` or numpy.ndarray, :class:`~GPy.core.model.Model`)
|
||||
>>>>>>> 22d30d9d39c70f806fe5bcb815cce9c8eb0f8dca
|
||||
"""
|
||||
from ..inference.latent_function_inference.inferenceX import infer_newX
|
||||
return infer_newX(self, Y_new, optimize=optimize)
|
||||
|
|
|
|||
|
|
@ -234,9 +234,9 @@ class Model(Parameterized):
|
|||
|
||||
"""
|
||||
if self.is_fixed:
|
||||
raise RuntimeError, "Cannot optimize, when everything is fixed"
|
||||
print 'nothing to optimize'
|
||||
if self.size == 0:
|
||||
raise RuntimeError, "Model without parameters cannot be optimized"
|
||||
print 'nothing to optimize'
|
||||
|
||||
if start == None:
|
||||
start = self.optimizer_array
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ class SparseGP_MPI(SparseGP):
|
|||
|
||||
if variational_prior is not None:
|
||||
self.link_parameter(variational_prior)
|
||||
|
||||
|
||||
self.mpi_comm = mpi_comm
|
||||
# Manage the data (Y) division
|
||||
if mpi_comm != None:
|
||||
|
|
@ -60,7 +60,6 @@ class SparseGP_MPI(SparseGP):
|
|||
mpi_comm.Bcast(self.param_array, root=0)
|
||||
self.update_model(True)
|
||||
|
||||
|
||||
def __getstate__(self):
|
||||
dc = super(SparseGP_MPI, self).__getstate__()
|
||||
dc['mpi_comm'] = None
|
||||
|
|
|
|||
|
|
@ -1,434 +0,0 @@
|
|||
# Copyright (c) 2012, James Hensman and Nicolo' Fusi
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs, jitchol, backsub_both_sides
|
||||
from gp import GP
|
||||
import time
|
||||
import sys
|
||||
|
||||
|
||||
class SVIGP(GP):
|
||||
"""
|
||||
|
||||
Stochastic Variational inference in a Gaussian Process
|
||||
|
||||
:param X: inputs
|
||||
:type X: np.ndarray (N x Q)
|
||||
:param Y: observed data
|
||||
:type Y: np.ndarray of observations (N x D)
|
||||
:param batchsize: the size of a h
|
||||
|
||||
Additional kwargs are used as for a sparse GP. They include:
|
||||
|
||||
:param q_u: canonical parameters of the distribution sqehd into a 1D array
|
||||
:type q_u: np.ndarray
|
||||
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||
:type M: int
|
||||
:param kernel: the kernel/covariance function. See link kernels
|
||||
:type kernel: a GPy kernel
|
||||
:param Z: inducing inputs (optional, see note)
|
||||
:type Z: np.ndarray (M x Q) | None
|
||||
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
|
||||
:type X_uncertainty: np.ndarray (N x Q) | None
|
||||
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
|
||||
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||
:type M: int
|
||||
:param beta: noise precision. TODO: ignore beta if doing EP
|
||||
:type beta: float
|
||||
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
|
||||
:type normalize_(X|Y): bool
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, X, Y, kernel, Z, q_u=None, batchsize=10):
|
||||
raise NotImplementedError, "This is a work in progress, see github issue "
|
||||
GP.__init__(self, X, Y, kernel)
|
||||
self.batchsize=batchsize
|
||||
self.Z = Z
|
||||
self.num_inducing = Z.shape[0]
|
||||
|
||||
self.batchcounter = 0
|
||||
self.epochs = 0
|
||||
self.iterations = 0
|
||||
|
||||
self.vb_steplength = 0.05
|
||||
self.param_steplength = 1e-5
|
||||
self.momentum = 0.9
|
||||
|
||||
if q_u is None:
|
||||
q_u = np.hstack((np.random.randn(self.num_inducing*self.output_dim),-.5*np.eye(self.num_inducing).flatten()))
|
||||
|
||||
self.set_vb_param(q_u)
|
||||
|
||||
self._permutation = np.random.permutation(self.num_data)
|
||||
self.load_batch()
|
||||
|
||||
self._param_trace = []
|
||||
self._ll_trace = []
|
||||
self._grad_trace = []
|
||||
|
||||
#set the adaptive steplength parameters
|
||||
#self.hbar_t = 0.0
|
||||
#self.tau_t = 100.0
|
||||
#self.gbar_t = 0.0
|
||||
#self.gbar_t1 = 0.0
|
||||
#self.gbar_t2 = 0.0
|
||||
#self.hbar_tp = 0.0
|
||||
#self.tau_tp = 10000.0
|
||||
#self.gbar_tp = 0.0
|
||||
#self.adapt_param_steplength = True
|
||||
#self.adapt_vb_steplength = True
|
||||
#self._param_steplength_trace = []
|
||||
#self._vb_steplength_trace = []
|
||||
|
||||
def _compute_kernel_matrices(self):
|
||||
# kernel computations, using BGPLVM notation
|
||||
self.Kmm = self.kern.K(self.Z)
|
||||
self.psi0 = self.kern.Kdiag(self.X_batch)
|
||||
self.psi1 = self.kern.K(self.X_batch, self.Z)
|
||||
self.psi2 = None
|
||||
|
||||
def dL_dtheta(self):
|
||||
dL_dtheta = self.kern._param_grad_helper(self.dL_dKmm, self.Z)
|
||||
if self.has_uncertain_inputs:
|
||||
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch)
|
||||
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch)
|
||||
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch)
|
||||
else:
|
||||
dL_dtheta += self.kern._param_grad_helper(self.dL_dpsi1, self.X_batch, self.Z)
|
||||
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch)
|
||||
return dL_dtheta
|
||||
|
||||
def _set_params(self, p, computations=True):
|
||||
self.kern._set_params_transformed(p[:self.kern.num_params])
|
||||
self.likelihood._set_params(p[self.kern.num_params:])
|
||||
if computations:
|
||||
self._compute_kernel_matrices()
|
||||
self._computations()
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.kern._get_params_transformed() , self.likelihood._get_params()))
|
||||
|
||||
def _get_param_names(self):
|
||||
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
|
||||
|
||||
def load_batch(self):
|
||||
"""
|
||||
load a batch of data (set self.X_batch and self.Y_batch from self.X, self.Y)
|
||||
"""
|
||||
|
||||
#if we've seen all the data, start again with them in a new random order
|
||||
if self.batchcounter+self.batchsize > self.num_data:
|
||||
self.batchcounter = 0
|
||||
self.epochs += 1
|
||||
self._permutation = np.random.permutation(self.num_data)
|
||||
|
||||
this_perm = self._permutation[self.batchcounter:self.batchcounter+self.batchsize]
|
||||
|
||||
self.X_batch = self.X[this_perm]
|
||||
self.Y_batch = self.Y[this_perm]
|
||||
|
||||
self.batchcounter += self.batchsize
|
||||
|
||||
self.data_prop = float(self.batchsize)/self.num_data
|
||||
|
||||
self._compute_kernel_matrices()
|
||||
self._computations()
|
||||
|
||||
def _computations(self,do_Kmm=True, do_Kmm_grad=True):
|
||||
"""
|
||||
All of the computations needed. Some are optional, see kwargs.
|
||||
"""
|
||||
|
||||
if do_Kmm:
|
||||
self.Lm = jitchol(self.Kmm)
|
||||
|
||||
# The rather complex computations of self.A
|
||||
if self.has_uncertain_inputs:
|
||||
if self.likelihood.is_heteroscedastic:
|
||||
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.batchsize, 1, 1))).sum(0)
|
||||
else:
|
||||
psi2_beta = self.psi2.sum(0) * self.likelihood.precision
|
||||
evals, evecs = np.linalg.eigh(psi2_beta)
|
||||
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
|
||||
tmp = evecs * np.sqrt(clipped_evals)
|
||||
else:
|
||||
if self.likelihood.is_heteroscedastic:
|
||||
tmp = self.psi1.T * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.batchsize)))
|
||||
else:
|
||||
tmp = self.psi1.T * (np.sqrt(self.likelihood.precision))
|
||||
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
|
||||
self.A = tdot(tmp)
|
||||
|
||||
self.V = self.likelihood.precision*self.likelihood.Y
|
||||
self.VmT = np.dot(self.V,self.q_u_expectation[0].T)
|
||||
self.psi1V = np.dot(self.psi1.T, self.V)
|
||||
|
||||
self.B = np.eye(self.num_inducing)*self.data_prop + self.A
|
||||
self.Lambda = backsub_both_sides(self.Lm, self.B.T)
|
||||
self.LQL = backsub_both_sides(self.Lm,self.q_u_expectation[1].T,transpose='right')
|
||||
|
||||
self.trace_K = self.psi0.sum() - np.trace(self.A)/self.likelihood.precision
|
||||
self.Kmmi_m, _ = dpotrs(self.Lm, self.q_u_expectation[0], lower=1)
|
||||
self.projected_mean = np.dot(self.psi1,self.Kmmi_m)
|
||||
|
||||
# Compute dL_dpsi
|
||||
self.dL_dpsi0 = - 0.5 * self.output_dim * self.likelihood.precision * np.ones(self.batchsize)
|
||||
self.dL_dpsi1, _ = dpotrs(self.Lm,np.asfortranarray(self.VmT.T),lower=1)
|
||||
self.dL_dpsi1 = self.dL_dpsi1.T
|
||||
|
||||
dL_dpsi2 = -0.5 * self.likelihood.precision * backsub_both_sides(self.Lm, self.LQL - self.output_dim * np.eye(self.num_inducing))
|
||||
if self.has_uncertain_inputs:
|
||||
self.dL_dpsi2 = np.repeat(dL_dpsi2[None,:,:],self.batchsize,axis=0)
|
||||
else:
|
||||
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2,self.psi1.T).T
|
||||
self.dL_dpsi2 = None
|
||||
|
||||
# Compute dL_dKmm
|
||||
if do_Kmm_grad:
|
||||
tmp = np.dot(self.LQL,self.A) - backsub_both_sides(self.Lm,np.dot(self.q_u_expectation[0],self.psi1V.T),transpose='right')
|
||||
tmp += tmp.T
|
||||
tmp += -self.output_dim*self.B
|
||||
tmp += self.data_prop*self.LQL
|
||||
self.dL_dKmm = 0.5*backsub_both_sides(self.Lm,tmp)
|
||||
|
||||
#Compute the gradient of the log likelihood wrt noise variance
|
||||
self.partial_for_likelihood = -0.5*(self.batchsize*self.output_dim - np.sum(self.A*self.LQL))*self.likelihood.precision
|
||||
self.partial_for_likelihood += (0.5*self.output_dim*self.trace_K + 0.5 * self.likelihood.trYYT - np.sum(self.likelihood.Y*self.projected_mean))*self.likelihood.precision**2
|
||||
|
||||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
As for uncollapsed sparse GP, but account for the proportion of data we're looking at right now.
|
||||
|
||||
NB. self.batchsize is the size of the batch, not the size of X_all
|
||||
"""
|
||||
assert not self.likelihood.is_heteroscedastic
|
||||
A = -0.5*self.batchsize*self.output_dim*(np.log(2.*np.pi) - np.log(self.likelihood.precision))
|
||||
B = -0.5*self.likelihood.precision*self.output_dim*self.trace_K
|
||||
Kmm_logdet = 2.*np.sum(np.log(np.diag(self.Lm)))
|
||||
C = -0.5*self.output_dim*self.data_prop*(Kmm_logdet-self.q_u_logdet - self.num_inducing)
|
||||
C += -0.5*np.sum(self.LQL * self.B)
|
||||
D = -0.5*self.likelihood.precision*self.likelihood.trYYT
|
||||
E = np.sum(self.V*self.projected_mean)
|
||||
return (A+B+C+D+E)/self.data_prop
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return np.hstack((self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))/self.data_prop
|
||||
|
||||
def vb_grad_natgrad(self):
|
||||
"""
|
||||
Compute the gradients of the lower bound wrt the canonical and
|
||||
Expectation parameters of u.
|
||||
|
||||
Note that the natural gradient in either is given by the gradient in
|
||||
the other (See Hensman et al 2012 Fast Variational inference in the
|
||||
conjugate exponential Family)
|
||||
"""
|
||||
|
||||
# Gradient for eta
|
||||
dL_dmmT_S = -0.5*self.Lambda/self.data_prop + 0.5*self.q_u_prec
|
||||
Kmmipsi1V,_ = dpotrs(self.Lm,self.psi1V,lower=1)
|
||||
dL_dm = (Kmmipsi1V - np.dot(self.Lambda,self.q_u_mean))/self.data_prop
|
||||
|
||||
# Gradients for theta
|
||||
S = self.q_u_cov
|
||||
Si = self.q_u_prec
|
||||
m = self.q_u_mean
|
||||
dL_dSi = -mdot(S,dL_dmmT_S, S)
|
||||
|
||||
dL_dmhSi = -2*dL_dSi
|
||||
dL_dSim = np.dot(dL_dSi,m) + np.dot(Si, dL_dm)
|
||||
|
||||
return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) , np.hstack((dL_dSim.flatten(), dL_dmhSi.flatten()))
|
||||
|
||||
|
||||
def optimize(self, iterations, print_interval=10, callback=lambda:None, callback_interval=5):
|
||||
|
||||
param_step = 0.
|
||||
|
||||
#Iterate!
|
||||
for i in range(iterations):
|
||||
|
||||
#store the current configuration for plotting later
|
||||
self._param_trace.append(self._get_params())
|
||||
self._ll_trace.append(self.log_likelihood() + self.log_prior())
|
||||
|
||||
#load a batch
|
||||
self.load_batch()
|
||||
|
||||
#compute the (stochastic) gradient
|
||||
natgrads = self.vb_grad_natgrad()
|
||||
grads = self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||
self._grad_trace.append(grads)
|
||||
|
||||
#compute the steps in all parameters
|
||||
vb_step = self.vb_steplength*natgrads[0]
|
||||
if (self.epochs>=1):#only move the parameters after the first epoch
|
||||
param_step = self.momentum*param_step + self.param_steplength*grads
|
||||
else:
|
||||
param_step = 0.
|
||||
|
||||
self.set_vb_param(self.get_vb_param() + vb_step)
|
||||
#Note: don't recompute everything here, wait until the next iteration when we have a new batch
|
||||
self._set_params(self._untransform_params(self._get_params_transformed() + param_step), computations=False)
|
||||
|
||||
#print messages if desired
|
||||
if i and (not i%print_interval):
|
||||
print i, np.mean(self._ll_trace[-print_interval:]) #, self.log_likelihood()
|
||||
print np.round(np.mean(self._grad_trace[-print_interval:],0),3)
|
||||
sys.stdout.flush()
|
||||
|
||||
#callback
|
||||
if i and not i%callback_interval:
|
||||
callback(self) # Change this to callback()
|
||||
time.sleep(0.01)
|
||||
|
||||
if self.epochs > 10:
|
||||
self._adapt_steplength()
|
||||
|
||||
self.iterations += 1
|
||||
|
||||
|
||||
def _adapt_steplength(self):
|
||||
if self.adapt_vb_steplength:
|
||||
# self._adaptive_vb_steplength()
|
||||
self._adaptive_vb_steplength_KL()
|
||||
self._vb_steplength_trace.append(self.vb_steplength)
|
||||
assert self.vb_steplength > 0
|
||||
|
||||
if self.adapt_param_steplength:
|
||||
self._adaptive_param_steplength()
|
||||
# self._adaptive_param_steplength_log()
|
||||
# self._adaptive_param_steplength_from_vb()
|
||||
self._param_steplength_trace.append(self.param_steplength)
|
||||
|
||||
def _adaptive_param_steplength(self):
|
||||
decr_factor = 0.02
|
||||
g_tp = self._transform_gradients(self._log_likelihood_gradients())
|
||||
self.gbar_tp = (1-1/self.tau_tp)*self.gbar_tp + 1/self.tau_tp * g_tp
|
||||
self.hbar_tp = (1-1/self.tau_tp)*self.hbar_tp + 1/self.tau_tp * np.dot(g_tp.T, g_tp)
|
||||
new_param_steplength = np.dot(self.gbar_tp.T, self.gbar_tp) / self.hbar_tp
|
||||
#- hack
|
||||
new_param_steplength *= decr_factor
|
||||
self.param_steplength = (self.param_steplength + new_param_steplength)/2
|
||||
#-
|
||||
self.tau_tp = self.tau_tp*(1-self.param_steplength) + 1
|
||||
|
||||
def _adaptive_param_steplength_log(self):
|
||||
stp = np.logspace(np.log(0.0001), np.log(1e-6), base=np.e, num=18000)
|
||||
self.param_steplength = stp[self.iterations]
|
||||
|
||||
def _adaptive_param_steplength_log2(self):
|
||||
self.param_steplength = (self.iterations + 0.001)**-0.5
|
||||
|
||||
def _adaptive_param_steplength_from_vb(self):
|
||||
self.param_steplength = self.vb_steplength * 0.01
|
||||
|
||||
def _adaptive_vb_steplength(self):
|
||||
decr_factor = 0.1
|
||||
g_t = self.vb_grad_natgrad()[0]
|
||||
self.gbar_t = (1-1/self.tau_t)*self.gbar_t + 1/self.tau_t * g_t
|
||||
self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t.T, g_t)
|
||||
new_vb_steplength = np.dot(self.gbar_t.T, self.gbar_t) / self.hbar_t
|
||||
#- hack
|
||||
new_vb_steplength *= decr_factor
|
||||
self.vb_steplength = (self.vb_steplength + new_vb_steplength)/2
|
||||
#-
|
||||
self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
|
||||
|
||||
def _adaptive_vb_steplength_KL(self):
|
||||
decr_factor = 0.1
|
||||
natgrad = self.vb_grad_natgrad()
|
||||
g_t1 = natgrad[0]
|
||||
g_t2 = natgrad[1]
|
||||
self.gbar_t1 = (1-1/self.tau_t)*self.gbar_t1 + 1/self.tau_t * g_t1
|
||||
self.gbar_t2 = (1-1/self.tau_t)*self.gbar_t2 + 1/self.tau_t * g_t2
|
||||
self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t1.T, g_t2)
|
||||
self.vb_steplength = np.dot(self.gbar_t1.T, self.gbar_t2) / self.hbar_t
|
||||
self.vb_steplength *= decr_factor
|
||||
self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
|
||||
|
||||
def _raw_predict(self, X_new, X_variance_new=None, which_parts='all',full_cov=False):
|
||||
"""Internal helper function for making predictions, does not account for normalization"""
|
||||
|
||||
#TODO: make this more efficient!
|
||||
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
|
||||
tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
|
||||
|
||||
if X_variance_new is None:
|
||||
Kx = self.kern.K(X_new,self.Z)
|
||||
mu = np.dot(Kx,self.Kmmi_m)
|
||||
if full_cov:
|
||||
Kxx = self.kern.K(X_new)
|
||||
var = Kxx - mdot(Kx,tmp,Kx.T)
|
||||
else:
|
||||
Kxx = self.kern.Kdiag(X_new)
|
||||
var = (Kxx - np.sum(Kx*np.dot(Kx,tmp),1))[:,None]
|
||||
return mu, var
|
||||
else:
|
||||
assert X_variance_new.shape == X_new.shape
|
||||
Kx = self.kern.psi1(self.Z,X_new, X_variance_new)
|
||||
mu = np.dot(Kx,self.Kmmi_m)
|
||||
Kxx = self.kern.psi0(self.Z,X_new,X_variance_new)
|
||||
psi2 = self.kern.psi2(self.Z,X_new,X_variance_new)
|
||||
diag_var = Kxx - np.sum(np.sum(psi2*tmp[None,:,:],1),1)
|
||||
if full_cov:
|
||||
raise NotImplementedError
|
||||
else:
|
||||
return mu, diag_var[:,None]
|
||||
|
||||
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
|
||||
# normalize X values
|
||||
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
|
||||
if X_variance_new is not None:
|
||||
X_variance_new = X_variance_new / self._Xscale ** 2
|
||||
|
||||
# here's the actual prediction by the GP model
|
||||
mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)
|
||||
|
||||
# now push through likelihood
|
||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
|
||||
|
||||
return mean, var, _025pm, _975pm
|
||||
|
||||
|
||||
def set_vb_param(self,vb_param):
|
||||
"""set the distribution q(u) from the canonical parameters"""
|
||||
self.q_u_canonical_flat = vb_param.copy()
|
||||
self.q_u_canonical = self.q_u_canonical_flat[:self.num_inducing*self.output_dim].reshape(self.num_inducing,self.output_dim),self.q_u_canonical_flat[self.num_inducing*self.output_dim:].reshape(self.num_inducing,self.num_inducing)
|
||||
|
||||
self.q_u_prec = -2.*self.q_u_canonical[1]
|
||||
self.q_u_cov, q_u_Li, q_u_L, tmp = pdinv(self.q_u_prec)
|
||||
self.q_u_Li = q_u_Li
|
||||
self.q_u_logdet = -tmp
|
||||
self.q_u_mean, _ = dpotrs(q_u_Li, np.asfortranarray(self.q_u_canonical[0]),lower=1)
|
||||
|
||||
self.q_u_expectation = (self.q_u_mean, np.dot(self.q_u_mean,self.q_u_mean.T)+self.q_u_cov*self.output_dim)
|
||||
|
||||
|
||||
def get_vb_param(self):
|
||||
"""
|
||||
Return the canonical parameters of the distribution q(u)
|
||||
"""
|
||||
return self.q_u_canonical_flat
|
||||
|
||||
|
||||
def plot(self, *args, **kwargs):
|
||||
"""
|
||||
See GPy.plotting.matplot_dep.svig_plots.plot
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import svig_plots
|
||||
svig_plots.plot(self,*args,**kwargs)
|
||||
|
||||
|
||||
def plot_traces(self):
|
||||
"""
|
||||
See GPy.plotting.matplot_dep.svig_plots.plot_traces
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import svig_plots
|
||||
svig_plots.plot_traces(self)
|
||||
|
|
@ -4,6 +4,4 @@
|
|||
import classification
|
||||
import regression
|
||||
import dimensionality_reduction
|
||||
import tutorials
|
||||
import stochastic
|
||||
import non_gaussian
|
||||
|
|
|
|||
|
|
@ -28,13 +28,13 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
|
|||
m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, num_inducing=num_inducing)
|
||||
|
||||
# Contrain all parameters to be positive
|
||||
m.tie_params('.*len')
|
||||
#m.tie_params('.*len')
|
||||
m['.*len'] = 10.
|
||||
m.update_likelihood_approximation()
|
||||
|
||||
# Optimize
|
||||
if optimize:
|
||||
m.optimize(max_iters=max_iters)
|
||||
for _ in range(5):
|
||||
m.optimize(max_iters=int(max_iters/5))
|
||||
print(m)
|
||||
|
||||
#Test
|
||||
|
|
@ -150,7 +150,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
|
|||
print m
|
||||
return m
|
||||
|
||||
def toy_heaviside(seed=default_seed, optimize=True, plot=True):
|
||||
def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
|
||||
"""
|
||||
Simple 1D classification example using a heavy side gp transformation
|
||||
|
||||
|
|
@ -166,16 +166,18 @@ def toy_heaviside(seed=default_seed, optimize=True, plot=True):
|
|||
Y[Y.flatten() == -1] = 0
|
||||
|
||||
# Model definition
|
||||
noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside())
|
||||
likelihood = GPy.likelihoods.EP(Y, noise_model)
|
||||
m = GPy.models.GPClassification(data['X'], likelihood=likelihood)
|
||||
kernel = GPy.kern.RBF(1)
|
||||
likelihood = GPy.likelihoods.Bernoulli(gp_link=GPy.likelihoods.link_functions.Heaviside())
|
||||
ep = GPy.inference.latent_function_inference.expectation_propagation.EP()
|
||||
m = GPy.core.GP(X=data['X'], Y=Y, kernel=kernel, likelihood=likelihood, inference_method=ep, name='gp_classification_heaviside')
|
||||
#m = GPy.models.GPClassification(data['X'], likelihood=likelihood)
|
||||
|
||||
# Optimize
|
||||
if optimize:
|
||||
m.update_likelihood_approximation()
|
||||
# Parameters optimization:
|
||||
m.optimize()
|
||||
#m.pseudo_EM()
|
||||
for _ in range(5):
|
||||
m.optimize(max_iters=int(max_iters/5))
|
||||
print m
|
||||
|
||||
# Plot
|
||||
if plot:
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ def student_t_approx(optimize=True, plot=True):
|
|||
t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
|
||||
laplace_inf = GPy.inference.latent_function_inference.Laplace()
|
||||
m3 = GPy.core.GP(X, Y.copy(), kernel3, likelihood=t_distribution, inference_method=laplace_inf)
|
||||
m3['.*t_noise'].constrain_bounded(1e-6, 10.)
|
||||
m3['.*t_scale2'].constrain_bounded(1e-6, 10.)
|
||||
m3['.*white'].constrain_fixed(1e-5)
|
||||
m3.randomize()
|
||||
|
||||
|
|
@ -67,7 +67,7 @@ def student_t_approx(optimize=True, plot=True):
|
|||
t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
|
||||
laplace_inf = GPy.inference.latent_function_inference.Laplace()
|
||||
m4 = GPy.core.GP(X, Yc.copy(), kernel4, likelihood=t_distribution, inference_method=laplace_inf)
|
||||
m4['.*t_noise'].constrain_bounded(1e-6, 10.)
|
||||
m4['.*t_scale2'].constrain_bounded(1e-6, 10.)
|
||||
m4['.*white'].constrain_fixed(1e-5)
|
||||
m4.randomize()
|
||||
print m4
|
||||
|
|
@ -124,6 +124,7 @@ def student_t_approx(optimize=True, plot=True):
|
|||
return m1, m2, m3, m4
|
||||
|
||||
def boston_example(optimize=True, plot=True):
|
||||
raise NotImplementedError("Needs updating")
|
||||
import sklearn
|
||||
from sklearn.cross_validation import KFold
|
||||
optimizer='bfgs'
|
||||
|
|
@ -152,8 +153,8 @@ def boston_example(optimize=True, plot=True):
|
|||
noise = 1e-1 #np.exp(-2)
|
||||
rbf_len = 0.5
|
||||
data_axis_plot = 4
|
||||
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||
kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||
kernelstu = GPy.kern.RBF(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||
kernelgp = GPy.kern.RBF(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||
|
||||
#Baseline
|
||||
score_folds[0, n] = rmse(Y_test, np.mean(Y_train))
|
||||
|
|
@ -162,8 +163,8 @@ def boston_example(optimize=True, plot=True):
|
|||
print "Gauss GP"
|
||||
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
|
||||
mgp.constrain_fixed('.*white', 1e-5)
|
||||
mgp['rbf_len'] = rbf_len
|
||||
mgp['noise'] = noise
|
||||
mgp['.*len'] = rbf_len
|
||||
mgp['.*noise'] = noise
|
||||
print mgp
|
||||
if optimize:
|
||||
mgp.optimize(optimizer=optimizer, messages=messages)
|
||||
|
|
@ -198,9 +199,9 @@ def boston_example(optimize=True, plot=True):
|
|||
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
|
||||
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
|
||||
mstu_t.constrain_fixed('.*white', 1e-5)
|
||||
mstu_t.constrain_bounded('.*t_noise', 0.0001, 1000)
|
||||
mstu_t.constrain_bounded('.*t_scale2', 0.0001, 1000)
|
||||
mstu_t['rbf_len'] = rbf_len
|
||||
mstu_t['.*t_noise'] = noise
|
||||
mstu_t['.*t_scale2'] = noise
|
||||
print mstu_t
|
||||
if optimize:
|
||||
mstu_t.optimize(optimizer=optimizer, messages=messages)
|
||||
|
|
|
|||
|
|
@ -1,40 +0,0 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
try:
|
||||
import pylab as pb
|
||||
except:
|
||||
pass
|
||||
import numpy as np
|
||||
import GPy
|
||||
|
||||
def toy_1d(optimize=True, plot=True):
|
||||
N = 2000
|
||||
M = 20
|
||||
|
||||
#create data
|
||||
X = np.linspace(0,32,N)[:,None]
|
||||
Z = np.linspace(0,32,M)[:,None]
|
||||
Y = np.sin(X) + np.cos(0.3*X) + np.random.randn(*X.shape)/np.sqrt(50.)
|
||||
|
||||
m = GPy.models.SVIGPRegression(X,Y, batchsize=10, Z=Z)
|
||||
m.constrain_bounded('noise_variance',1e-3,1e-1)
|
||||
m.constrain_bounded('white_variance',1e-3,1e-1)
|
||||
|
||||
m.param_steplength = 1e-4
|
||||
|
||||
if plot:
|
||||
fig = pb.figure()
|
||||
ax = fig.add_subplot(111)
|
||||
def cb(foo):
|
||||
ax.cla()
|
||||
m.plot(ax=ax,Z_height=-3)
|
||||
ax.set_ylim(-3,3)
|
||||
fig.canvas.draw()
|
||||
|
||||
if optimize:
|
||||
m.optimize(500, callback=cb, callback_interval=1)
|
||||
|
||||
if plot:
|
||||
m.plot_traces()
|
||||
return m
|
||||
|
|
@ -1,156 +0,0 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
"""
|
||||
Code of Tutorials
|
||||
"""
|
||||
|
||||
try:
|
||||
import pylab as pb
|
||||
pb.ion()
|
||||
except:
|
||||
pass
|
||||
import numpy as np
|
||||
import GPy
|
||||
|
||||
def tuto_GP_regression(optimize=True, plot=True):
|
||||
"""The detailed explanations of the commands used in this file can be found in the tutorial section"""
|
||||
|
||||
X = np.random.uniform(-3.,3.,(20,1))
|
||||
Y = np.sin(X) + np.random.randn(20,1)*0.05
|
||||
|
||||
kernel = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
|
||||
|
||||
m = GPy.models.GPRegression(X, Y, kernel)
|
||||
|
||||
print m
|
||||
if plot:
|
||||
m.plot()
|
||||
|
||||
m.constrain_positive('')
|
||||
|
||||
m.unconstrain('') # may be used to remove the previous constrains
|
||||
m.constrain_positive('.*rbf_variance')
|
||||
m.constrain_bounded('.*lengthscale',1.,10. )
|
||||
m.constrain_fixed('.*noise',0.0025)
|
||||
|
||||
if optimize:
|
||||
m.optimize()
|
||||
m.optimize_restarts(num_restarts = 10)
|
||||
|
||||
#######################################################
|
||||
#######################################################
|
||||
# sample inputs and outputs
|
||||
X = np.random.uniform(-3.,3.,(50,2))
|
||||
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
|
||||
|
||||
# define kernel
|
||||
ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
|
||||
|
||||
# create simple GP model
|
||||
m = GPy.models.GPRegression(X, Y, ker)
|
||||
|
||||
# contrain all parameters to be positive
|
||||
m.constrain_positive('')
|
||||
|
||||
# optimize and plot
|
||||
if optimize:
|
||||
m.optimize('tnc', max_f_eval = 1000)
|
||||
if plot:
|
||||
m.plot()
|
||||
|
||||
print m
|
||||
return(m)
|
||||
|
||||
def tuto_kernel_overview(optimize=True, plot=True):
|
||||
"""The detailed explanations of the commands used in this file can be found in the tutorial section"""
|
||||
ker1 = GPy.kern.rbf(1) # Equivalent to ker1 = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
|
||||
ker2 = GPy.kern.rbf(input_dim=1, variance = .75, lengthscale=2.)
|
||||
ker3 = GPy.kern.rbf(1, .5, .5)
|
||||
|
||||
print ker2
|
||||
|
||||
if plot:
|
||||
ker1.plot()
|
||||
ker2.plot()
|
||||
ker3.plot()
|
||||
|
||||
k1 = GPy.kern.rbf(1,1.,2.)
|
||||
k2 = GPy.kern.Matern32(1, 0.5, 0.2)
|
||||
|
||||
# Product of kernels
|
||||
k_prod = k1.prod(k2) # By default, tensor=False
|
||||
k_prodtens = k1.prod(k2,tensor=True)
|
||||
|
||||
# Sum of kernels
|
||||
k_add = k1.add(k2) # By default, tensor=False
|
||||
k_addtens = k1.add(k2,tensor=True)
|
||||
|
||||
k1 = GPy.kern.rbf(1,1.,2)
|
||||
k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
|
||||
|
||||
k = k1 * k2 # equivalent to k = k1.prod(k2)
|
||||
print k
|
||||
|
||||
# Simulate sample paths
|
||||
X = np.linspace(-5,5,501)[:,None]
|
||||
Y = np.random.multivariate_normal(np.zeros(501),k.K(X),1)
|
||||
|
||||
k1 = GPy.kern.rbf(1)
|
||||
k2 = GPy.kern.Matern32(1)
|
||||
k3 = GPy.kern.white(1)
|
||||
|
||||
k = k1 + k2 + k3
|
||||
print k
|
||||
|
||||
k.constrain_positive('.*var')
|
||||
k.constrain_fixed(np.array([1]),1.75)
|
||||
k.tie_params('.*len')
|
||||
k.unconstrain('white')
|
||||
k.constrain_bounded('white',lower=1e-5,upper=.5)
|
||||
print k
|
||||
|
||||
k_cst = GPy.kern.bias(1,variance=1.)
|
||||
k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
|
||||
Kanova = (k_cst + k_mat).prod(k_cst + k_mat,tensor=True)
|
||||
print Kanova
|
||||
|
||||
# sample inputs and outputs
|
||||
X = np.random.uniform(-3.,3.,(40,2))
|
||||
Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
|
||||
|
||||
# Create GP regression model
|
||||
m = GPy.models.GPRegression(X, Y, Kanova)
|
||||
|
||||
if plot:
|
||||
fig = pb.figure(figsize=(5,5))
|
||||
ax = fig.add_subplot(111)
|
||||
m.plot(ax=ax)
|
||||
|
||||
pb.figure(figsize=(20,3))
|
||||
pb.subplots_adjust(wspace=0.5)
|
||||
axs = pb.subplot(1,5,1)
|
||||
m.plot(ax=axs)
|
||||
pb.subplot(1,5,2)
|
||||
pb.ylabel("= ",rotation='horizontal',fontsize='30')
|
||||
axs = pb.subplot(1,5,3)
|
||||
m.plot(ax=axs, which_parts=[False,True,False,False])
|
||||
pb.ylabel("cst +",rotation='horizontal',fontsize='30')
|
||||
axs = pb.subplot(1,5,4)
|
||||
m.plot(ax=axs, which_parts=[False,False,True,False])
|
||||
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
|
||||
axs = pb.subplot(1,5,5)
|
||||
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
|
||||
m.plot(ax=axs, which_parts=[False,False,False,True])
|
||||
|
||||
return(m)
|
||||
|
||||
|
||||
def model_interaction(optimize=True, plot=True):
|
||||
X = np.random.randn(20,1)
|
||||
Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5.
|
||||
k = GPy.kern.rbf(1) + GPy.kern.bias(1)
|
||||
m = GPy.models.GPRegression(X, Y, kernel=k)
|
||||
return m
|
||||
|
||||
|
|
@ -7,193 +7,392 @@ The package for the psi statistics computation
|
|||
|
||||
import numpy as np
|
||||
|
||||
def psicomputations(variance, lengthscale, Z, variational_posterior):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi0, psi1 and psi2
|
||||
# Produced intermediate results:
|
||||
# _psi1 NxM
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
try:
|
||||
from scipy import weave
|
||||
|
||||
def _psicomputations(variance, lengthscale, Z, variational_posterior):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi0, psi1 and psi2
|
||||
# Produced intermediate results:
|
||||
# _psi1 NxM
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
|
||||
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||
l2 = np.square(lengthscale)
|
||||
log_denom1 = np.log(S/l2+1)
|
||||
log_denom2 = np.log(2*S/l2+1)
|
||||
log_gamma = np.log(gamma)
|
||||
log_gamma1 = np.log(1.-gamma)
|
||||
variance = float(variance)
|
||||
psi0 = np.empty(N)
|
||||
psi0[:] = variance
|
||||
psi1 = np.empty((N,M))
|
||||
psi2n = np.empty((N,M,M))
|
||||
|
||||
from ....util.misc import param_to_array
|
||||
S = param_to_array(S)
|
||||
mu = param_to_array(mu)
|
||||
gamma = param_to_array(gamma)
|
||||
Z = param_to_array(Z)
|
||||
|
||||
support_code = """
|
||||
#include <math.h>
|
||||
"""
|
||||
code = """
|
||||
for(int n=0; n<N; n++) {
|
||||
for(int m1=0;m1<M;m1++) {
|
||||
double log_psi1=0;
|
||||
for(int m2=0;m2<=m1;m2++) {
|
||||
double log_psi2_n=0;
|
||||
for(int q=0;q<Q;q++) {
|
||||
double Snq = S(n,q);
|
||||
double lq = l2(q);
|
||||
double Zm1q = Z(m1,q);
|
||||
double Zm2q = Z(m2,q);
|
||||
|
||||
if(m2==0) {
|
||||
// Compute Psi_1
|
||||
double muZ = mu(n,q)-Z(m1,q);
|
||||
|
||||
double psi1_exp1 = log_gamma(n,q) - (muZ*muZ/(Snq+lq) +log_denom1(n,q))/2.;
|
||||
double psi1_exp2 = log_gamma1(n,q) -Zm1q*Zm1q/(2.*lq);
|
||||
log_psi1 += (psi1_exp1>psi1_exp2)?psi1_exp1+log1p(exp(psi1_exp2-psi1_exp1)):psi1_exp2+log1p(exp(psi1_exp1-psi1_exp2));
|
||||
}
|
||||
// Compute Psi_2
|
||||
double muZhat = mu(n,q) - (Zm1q+Zm2q)/2.;
|
||||
double Z2 = Zm1q*Zm1q+ Zm2q*Zm2q;
|
||||
double dZ = Zm1q - Zm2q;
|
||||
|
||||
double psi2_exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
|
||||
double psi2_exp2 = log_gamma1(n,q) - Z2/(2.*lq);
|
||||
log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2));
|
||||
}
|
||||
double exp_psi2_n = exp(log_psi2_n);
|
||||
psi2n(n,m1,m2) = variance*variance*exp_psi2_n;
|
||||
if(m1!=m2) { psi2n(n,m2,m1) = variance*variance*exp_psi2_n;}
|
||||
}
|
||||
psi1(n,m1) = variance*exp(log_psi1);
|
||||
}
|
||||
}
|
||||
"""
|
||||
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
|
||||
|
||||
psi2 = psi2n.sum(axis=0)
|
||||
return psi0,psi1,psi2,psi2n
|
||||
|
||||
from GPy.util.caching import Cacher
|
||||
psicomputations = Cacher(_psicomputations, limit=1)
|
||||
|
||||
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
ARD = (len(lengthscale)!=1)
|
||||
|
||||
_,psi1,_,psi2n = psicomputations(variance, lengthscale, Z, variational_posterior)
|
||||
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||
l2 = np.square(lengthscale)
|
||||
log_denom1 = np.log(S/l2+1)
|
||||
log_denom2 = np.log(2*S/l2+1)
|
||||
log_gamma = np.log(gamma)
|
||||
log_gamma1 = np.log(1.-gamma)
|
||||
variance = float(variance)
|
||||
|
||||
dvar = np.zeros(1)
|
||||
dmu = np.zeros((N,Q))
|
||||
dS = np.zeros((N,Q))
|
||||
dgamma = np.zeros((N,Q))
|
||||
dl = np.zeros(Q)
|
||||
dZ = np.zeros((M,Q))
|
||||
dvar += np.sum(dL_dpsi0)
|
||||
|
||||
from ....util.misc import param_to_array
|
||||
S = param_to_array(S)
|
||||
mu = param_to_array(mu)
|
||||
gamma = param_to_array(gamma)
|
||||
Z = param_to_array(Z)
|
||||
|
||||
support_code = """
|
||||
#include <math.h>
|
||||
"""
|
||||
code = """
|
||||
for(int n=0; n<N; n++) {
|
||||
for(int m1=0;m1<M;m1++) {
|
||||
double log_psi1=0;
|
||||
for(int m2=0;m2<M;m2++) {
|
||||
double log_psi2_n=0;
|
||||
for(int q=0;q<Q;q++) {
|
||||
double Snq = S(n,q);
|
||||
double lq = l2(q);
|
||||
double Zm1q = Z(m1,q);
|
||||
double Zm2q = Z(m2,q);
|
||||
double gnq = gamma(n,q);
|
||||
double mu_nq = mu(n,q);
|
||||
|
||||
if(m2==0) {
|
||||
// Compute Psi_1
|
||||
double lpsi1 = psi1(n,m1)*dL_dpsi1(n,m1);
|
||||
if(q==0) {dvar(0) += lpsi1/variance;}
|
||||
|
||||
double Zmu = Zm1q - mu_nq;
|
||||
double denom = Snq+lq;
|
||||
double Zmu2_denom = Zmu*Zmu/denom;
|
||||
|
||||
double exp1 = log_gamma(n,q)-(Zmu*Zmu/(Snq+lq)+log_denom1(n,q))/(2.);
|
||||
double exp2 = log_gamma1(n,q)-Zm1q*Zm1q/(2.*lq);
|
||||
double d_exp1,d_exp2;
|
||||
if(exp1>exp2) {
|
||||
d_exp1 = 1.;
|
||||
d_exp2 = exp(exp2-exp1);
|
||||
} else {
|
||||
d_exp1 = exp(exp1-exp2);
|
||||
d_exp2 = 1.;
|
||||
}
|
||||
double exp_sum = d_exp1+d_exp2;
|
||||
|
||||
dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
|
||||
dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
|
||||
dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
|
||||
dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum);
|
||||
dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
|
||||
}
|
||||
// Compute Psi_2
|
||||
double lpsi2 = psi2n(n,m1,m2)*dL_dpsi2(m1,m2);
|
||||
if(q==0) {dvar(0) += lpsi2*2/variance;}
|
||||
|
||||
double dZm1m2 = Zm1q - Zm2q;
|
||||
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
|
||||
double muZhat = mu_nq - (Zm1q + Zm2q)/2.;
|
||||
double denom = 2.*Snq+lq;
|
||||
double muZhat2_denom = muZhat*muZhat/denom;
|
||||
|
||||
double exp1 = dZm1m2*dZm1m2/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
|
||||
double exp2 = log_gamma1(n,q) - Z2/(2.*lq);
|
||||
double d_exp1,d_exp2;
|
||||
if(exp1>exp2) {
|
||||
d_exp1 = 1.;
|
||||
d_exp2 = exp(exp2-exp1);
|
||||
} else {
|
||||
d_exp1 = exp(exp1-exp2);
|
||||
d_exp2 = 1.;
|
||||
}
|
||||
double exp_sum = d_exp1+d_exp2;
|
||||
|
||||
dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
|
||||
dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
|
||||
dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
|
||||
dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
|
||||
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
|
||||
|
||||
dl *= 2.*lengthscale
|
||||
if not ARD:
|
||||
dl = dl.sum()
|
||||
|
||||
return dvar, dl, dZ, dmu, dS, dgamma
|
||||
|
||||
except:
|
||||
|
||||
def psicomputations(variance, lengthscale, Z, variational_posterior):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi0, psi1 and psi2
|
||||
# Produced intermediate results:
|
||||
# _psi1 NxM
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
|
||||
psi0 = np.empty(mu.shape[0])
|
||||
psi0[:] = variance
|
||||
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
|
||||
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
|
||||
return psi0, psi1, psi2
|
||||
|
||||
psi0 = np.empty(mu.shape[0])
|
||||
psi0[:] = variance
|
||||
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
|
||||
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
|
||||
return psi0, psi1, psi2
|
||||
|
||||
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi1
|
||||
# Produced intermediate results:
|
||||
# _psi1 NxM
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
# psi1
|
||||
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
|
||||
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
|
||||
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
|
||||
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
|
||||
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
|
||||
_psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ
|
||||
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ
|
||||
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
|
||||
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
|
||||
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
|
||||
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
|
||||
|
||||
return _psi1
|
||||
|
||||
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi2
|
||||
# Produced intermediate results:
|
||||
# _psi2 MxM
|
||||
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi1
|
||||
# Produced intermediate results:
|
||||
# _psi1 NxM
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
|
||||
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
|
||||
|
||||
# psi2
|
||||
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
|
||||
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
|
||||
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
|
||||
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
|
||||
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
|
||||
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
|
||||
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
|
||||
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
|
||||
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
||||
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
||||
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
|
||||
|
||||
return _psi2
|
||||
|
||||
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
ARD = (len(lengthscale)!=1)
|
||||
# psi1
|
||||
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
|
||||
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
|
||||
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
|
||||
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
|
||||
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
|
||||
_psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ
|
||||
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ
|
||||
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
|
||||
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
|
||||
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
|
||||
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
|
||||
|
||||
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
|
||||
return _psi1
|
||||
|
||||
dL_dlengscale = dl_psi1 + dl_psi2
|
||||
if not ARD:
|
||||
dL_dlengscale = dL_dlengscale.sum()
|
||||
|
||||
dL_dgamma = dgamma_psi1 + dgamma_psi2
|
||||
dL_dmu = dmu_psi1 + dmu_psi2
|
||||
dL_dS = dS_psi1 + dS_psi2
|
||||
dL_dZ = dZ_psi1 + dZ_psi2
|
||||
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi2
|
||||
# Produced intermediate results:
|
||||
# _psi2 MxM
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
|
||||
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
|
||||
|
||||
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
|
||||
|
||||
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
dL_dpsi1 - NxM
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi1
|
||||
# Produced intermediate results: dL_dparams w.r.t. psi1
|
||||
# _dL_dvariance 1
|
||||
# _dL_dlengthscale Q
|
||||
# _dL_dZ MxQ
|
||||
# _dL_dgamma NxQ
|
||||
# _dL_dmu NxQ
|
||||
# _dL_dS NxQ
|
||||
# psi2
|
||||
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
|
||||
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
|
||||
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
|
||||
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
|
||||
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
|
||||
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
|
||||
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
|
||||
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
|
||||
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
||||
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
||||
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
# psi1
|
||||
_psi1_denom = S / lengthscale2 + 1. # NxQ
|
||||
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
|
||||
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
|
||||
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ
|
||||
_psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ
|
||||
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ
|
||||
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
|
||||
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
|
||||
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
|
||||
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
|
||||
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
|
||||
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
|
||||
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
|
||||
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
|
||||
_dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1
|
||||
_dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ
|
||||
_dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ
|
||||
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
|
||||
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
|
||||
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
|
||||
|
||||
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
|
||||
|
||||
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
dL_dpsi2 - MxM
|
||||
"""
|
||||
# here are the "statistics" for psi2
|
||||
# Produced the derivatives w.r.t. psi2:
|
||||
# _dL_dvariance 1
|
||||
# _dL_dlengthscale Q
|
||||
# _dL_dZ MxQ
|
||||
# _dL_dgamma NxQ
|
||||
# _dL_dmu NxQ
|
||||
# _dL_dS NxQ
|
||||
return _psi2
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
ARD = (len(lengthscale)!=1)
|
||||
|
||||
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
|
||||
|
||||
dL_dlengscale = dl_psi1 + dl_psi2
|
||||
if not ARD:
|
||||
dL_dlengscale = dL_dlengscale.sum()
|
||||
|
||||
dL_dgamma = dgamma_psi1 + dgamma_psi2
|
||||
dL_dmu = dmu_psi1 + dmu_psi2
|
||||
dL_dS = dS_psi1 + dS_psi2
|
||||
dL_dZ = dZ_psi1 + dZ_psi2
|
||||
|
||||
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
|
||||
|
||||
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
|
||||
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
|
||||
|
||||
# psi2
|
||||
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
|
||||
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
|
||||
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
|
||||
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:])
|
||||
_psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ
|
||||
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q
|
||||
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
|
||||
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
|
||||
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
||||
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
||||
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
|
||||
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
|
||||
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
|
||||
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
|
||||
_dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance
|
||||
_dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z))
|
||||
_dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
|
||||
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
|
||||
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
|
||||
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
|
||||
|
||||
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
|
||||
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
dL_dpsi1 - NxM
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
"""
|
||||
# here are the "statistics" for psi1
|
||||
# Produced intermediate results: dL_dparams w.r.t. psi1
|
||||
# _dL_dvariance 1
|
||||
# _dL_dlengthscale Q
|
||||
# _dL_dZ MxQ
|
||||
# _dL_dgamma NxQ
|
||||
# _dL_dmu NxQ
|
||||
# _dL_dS NxQ
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
# psi1
|
||||
_psi1_denom = S / lengthscale2 + 1. # NxQ
|
||||
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
|
||||
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
|
||||
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ
|
||||
_psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ
|
||||
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ
|
||||
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
|
||||
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
|
||||
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
|
||||
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
|
||||
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
|
||||
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
|
||||
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
|
||||
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
|
||||
_dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1
|
||||
_dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ
|
||||
_dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ
|
||||
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
|
||||
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
|
||||
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
|
||||
|
||||
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
|
||||
|
||||
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
|
||||
"""
|
||||
Z - MxQ
|
||||
mu - NxQ
|
||||
S - NxQ
|
||||
gamma - NxQ
|
||||
dL_dpsi2 - MxM
|
||||
"""
|
||||
# here are the "statistics" for psi2
|
||||
# Produced the derivatives w.r.t. psi2:
|
||||
# _dL_dvariance 1
|
||||
# _dL_dlengthscale Q
|
||||
# _dL_dZ MxQ
|
||||
# _dL_dgamma NxQ
|
||||
# _dL_dmu NxQ
|
||||
# _dL_dS NxQ
|
||||
|
||||
lengthscale2 = np.square(lengthscale)
|
||||
|
||||
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
|
||||
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
|
||||
|
||||
# psi2
|
||||
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
|
||||
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
|
||||
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
|
||||
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:])
|
||||
_psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ
|
||||
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q
|
||||
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
|
||||
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
|
||||
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
|
||||
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
|
||||
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
|
||||
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
|
||||
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
|
||||
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
|
||||
_dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance
|
||||
_dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z))
|
||||
_dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
|
||||
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
|
||||
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
|
||||
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
|
||||
|
||||
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
from gp_regression import GPRegression
|
||||
from gp_classification import GPClassification
|
||||
from sparse_gp_regression import SparseGPRegression, SparseGPRegressionUncertainInput
|
||||
from svigp_regression import SVIGPRegression
|
||||
from sparse_gp_classification import SparseGPClassification
|
||||
from gplvm import GPLVM
|
||||
from bcgplvm import BCGPLVM
|
||||
|
|
|
|||
|
|
@ -25,8 +25,6 @@ class BayesianGPLVM(SparseGP_MPI):
|
|||
Z=None, kernel=None, inference_method=None, likelihood=None,
|
||||
name='bayesian gplvm', mpi_comm=None, normalizer=None,
|
||||
missing_data=False, stochastic=False, batchsize=1):
|
||||
self.mpi_comm = mpi_comm
|
||||
self.__IN_OPTIMIZATION__ = False
|
||||
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
if X is None:
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from .. import likelihoods
|
|||
from .. import kern
|
||||
from ..inference.latent_function_inference import VarDTC
|
||||
from ..core.parameterization.variational import NormalPosterior
|
||||
from GPy.inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
|
||||
|
||||
class SparseGPRegression(SparseGP_MPI):
|
||||
"""
|
||||
|
|
@ -30,7 +31,7 @@ class SparseGPRegression(SparseGP_MPI):
|
|||
|
||||
"""
|
||||
|
||||
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None):
|
||||
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None, mpi_comm=None):
|
||||
num_data, input_dim = X.shape
|
||||
|
||||
# kern defaults to rbf (plus white for stability)
|
||||
|
|
@ -48,8 +49,14 @@ class SparseGPRegression(SparseGP_MPI):
|
|||
|
||||
if not (X_variance is None):
|
||||
X = NormalPosterior(X,X_variance)
|
||||
|
||||
if mpi_comm is not None:
|
||||
from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
|
||||
infr = VarDTC_minibatch(mpi_comm=mpi_comm)
|
||||
else:
|
||||
infr = VarDTC()
|
||||
|
||||
SparseGP_MPI.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC(), normalizer=normalizer)
|
||||
SparseGP_MPI.__init__(self, X, Y, Z, kernel, likelihood, inference_method=infr, normalizer=normalizer, mpi_comm=mpi_comm)
|
||||
|
||||
def parameters_changed(self):
|
||||
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients_sparsegp,VarDTC_minibatch
|
||||
|
|
|
|||
|
|
@ -1,45 +0,0 @@
|
|||
# Copyright (c) 2012, James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..core import SVIGP
|
||||
from .. import likelihoods
|
||||
from .. import kern
|
||||
|
||||
class SVIGPRegression(SVIGP):
|
||||
"""
|
||||
Gaussian Process model for regression
|
||||
|
||||
This is a thin wrapper around the SVIGP class, with a set of sensible defalts
|
||||
|
||||
:param X: input observations
|
||||
:param Y: observed values
|
||||
:param kernel: a GPy kernel, defaults to rbf+white
|
||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||
:type normalize_X: False|True
|
||||
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
||||
:type normalize_Y: False|True
|
||||
:rtype: model object
|
||||
|
||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, q_u=None, batchsize=10, normalize_Y=False):
|
||||
# kern defaults to rbf (plus white for stability)
|
||||
if kernel is None:
|
||||
kernel = kern.rbf(X.shape[1], variance=1., lengthscale=4.) + kern.white(X.shape[1], 1e-3)
|
||||
|
||||
# Z defaults to a subset of the data
|
||||
if Z is None:
|
||||
i = np.random.permutation(X.shape[0])[:num_inducing]
|
||||
Z = X[i].copy()
|
||||
else:
|
||||
assert Z.shape[1] == X.shape[1]
|
||||
|
||||
# likelihood defaults to Gaussian
|
||||
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
|
||||
|
||||
SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
|
||||
self.load_batch()
|
||||
|
||||
|
|
@ -6,7 +6,6 @@ import models_plots
|
|||
import priors_plots
|
||||
import variational_plots
|
||||
import kernel_plots
|
||||
import svig_plots
|
||||
import dim_reduction_plots
|
||||
import mapping_plots
|
||||
import Tango
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ comm = MPI.COMM_WORLD
|
|||
N = 100
|
||||
x = np.linspace(-6., 6., N)
|
||||
y = np.sin(x) + np.random.randn(N) * 0.05
|
||||
comm.Bcast(y)
|
||||
data = np.vstack([x,y])
|
||||
infr = GPy.inference.latent_function_inference.VarDTC_minibatch(mpi_comm=comm)
|
||||
m = GPy.models.BayesianGPLVM(data.T,1,mpi_comm=comm)
|
||||
|
|
@ -39,9 +40,43 @@ if comm.rank==0:
|
|||
(stdout, stderr) = p.communicate()
|
||||
L1 = float(stdout.splitlines()[-2])
|
||||
L2 = float(stdout.splitlines()[-1])
|
||||
self.assertAlmostEqual(L1, L2)
|
||||
self.assertTrue(np.allclose(L1,L2))
|
||||
import os
|
||||
os.remove('mpi_test__.py')
|
||||
|
||||
def test_SparseGPRegression_MPI(self):
|
||||
code = """
|
||||
import numpy as np
|
||||
import GPy
|
||||
from mpi4py import MPI
|
||||
np.random.seed(123456)
|
||||
comm = MPI.COMM_WORLD
|
||||
N = 100
|
||||
x = np.linspace(-6., 6., N)
|
||||
y = np.sin(x) + np.random.randn(N) * 0.05
|
||||
comm.Bcast(y)
|
||||
data = np.vstack([x,y])
|
||||
#infr = GPy.inference.latent_function_inference.VarDTC_minibatch(mpi_comm=comm)
|
||||
m = GPy.models.SparseGPRegression(data[:1].T,data[1:2].T,mpi_comm=comm)
|
||||
m.optimize(max_iters=10)
|
||||
if comm.rank==0:
|
||||
print float(m.objective_function())
|
||||
m.inference_method.mpi_comm=None
|
||||
m.mpi_comm=None
|
||||
m._trigger_params_changed()
|
||||
print float(m.objective_function())
|
||||
"""
|
||||
with open('mpi_test__.py','w') as f:
|
||||
f.write(code)
|
||||
f.close()
|
||||
p = subprocess.Popen('mpirun -n 4 python mpi_test__.py',stdout=subprocess.PIPE,shell=True)
|
||||
(stdout, stderr) = p.communicate()
|
||||
L1 = float(stdout.splitlines()[-2])
|
||||
L2 = float(stdout.splitlines()[-1])
|
||||
self.assertTrue(np.allclose(L1,L2))
|
||||
import os
|
||||
os.remove('mpi_test__.py')
|
||||
|
||||
|
||||
except:
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -5,23 +5,28 @@
|
|||
|
||||
Welcome to GPy's documentation!
|
||||
===============================
|
||||
For a quick start, you can have a look at one of the tutorials:
|
||||
|
||||
* `Basic Gaussian process regression <tuto_GP_regression.html>`_
|
||||
* `Interacting with models <tuto_interacting_with_models.html>`_
|
||||
* `A kernel overview <tuto_kernel_overview.html>`_
|
||||
* `Writing new kernels <tuto_creating_new_kernels.html>`_
|
||||
* `Writing new models <tuto_creating_new_models.html>`_
|
||||
* `Parameterization handles <tuto_parameterized.html>`_
|
||||
`GPy <http://sheffieldml.github.io/GPy/>`_ is a Gaussian Process (GP) framework written in Python, from the Sheffield machine learning group.
|
||||
|
||||
You may also be interested by some examples in the GPy/examples folder.
|
||||
The `GPy homepage <http://sheffieldml.github.io/GPy/>`_ contains tutorials for users and further information on the project, including installation instructions.
|
||||
This documentation is mostly aimed at developers interacting closely with the code-base.
|
||||
|
||||
The code can be found on our `Github project page <https://github.com/SheffieldML/GPy>`_. It is open source and provided under the BSD license.
|
||||
|
||||
.. * `Basic Gaussian process regression <tuto_GP_regression.html>`_
|
||||
.. * `Interacting with models <tuto_interacting_with_models.html>`_
|
||||
.. * `A kernel overview <tuto_kernel_overview.html>`_
|
||||
.. * `Writing new kernels <tuto_creating_new_kernels.html>`_
|
||||
.. * `Writing new models <tuto_creating_new_models.html>`_
|
||||
.. * `Parameterization handles <tuto_parameterized.html>`_
|
||||
|
||||
.. You may also be interested by some examples in the GPy/examples folder.
|
||||
|
||||
Contents:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
installation
|
||||
GPy
|
||||
|
||||
|
||||
|
|
|
|||
30
doc/log.txt
30
doc/log.txt
|
|
@ -1,7 +1,7 @@
|
|||
/home/alans/Work/GPy/GPy/__init__.py:docstring of GPy.load:1: WARNING: Inline interpreted text or phrase reference start-string without end-string.
|
||||
/home/alans/Work/GPy/GPy/core/gp.py:docstring of GPy.core.gp.GP:7: WARNING: Field list ends without a blank line; unexpected unindent.
|
||||
/home/alans/Work/GPy/GPy/core/gp.py:docstring of GPy.core.gp.GP:10: ERROR: Unexpected indentation.
|
||||
/home/alans/Work/GPy/GPy/core/gp.py:docstring of GPy.core.gp.GP.optimize:8: ERROR: Unknown interpreted text role "module".
|
||||
/home/alans/Work/GPy/GPy/core/gp.py:docstring of GPy.core.gp.GP.predictive_gradients:5: ERROR: Unexpected indentation.
|
||||
/home/alans/Work/GPy/GPy/core/gp.py:docstring of GPy.core.gp.GP.predictive_gradients:8: WARNING: Block quote ends without a blank line; unexpected unindent.
|
||||
/home/alans/Work/GPy/GPy/core/model.py:docstring of GPy.core.model.Model.optimize_restarts:29: WARNING: Explicit markup ends without a blank line; unexpected unindent.
|
||||
/home/alans/Work/GPy/doc/GPy.core.rst:65: WARNING: autodoc: failed to import module u'GPy.core.symbolic'; the following exception was raised:
|
||||
Traceback (most recent call last):
|
||||
|
|
@ -23,31 +23,6 @@ ImportError: No module named lambdify
|
|||
/home/alans/Work/GPy/GPy/core/parameterization/ties_and_remappings.py:docstring of GPy.core.parameterization.ties_and_remappings.Tie:18: SEVERE: Unexpected section title or transition.
|
||||
|
||||
================================
|
||||
/home/alans/Work/GPy/doc/GPy.examples.rst:18: WARNING: autodoc: failed to import module u'GPy.examples.coreg_example'; the following exception was raised:
|
||||
Traceback (most recent call last):
|
||||
File "/home/alans/anaconda/envs/GPy/lib/python2.7/site-packages/sphinx/ext/autodoc.py", line 335, in import_object
|
||||
__import__(self.modname)
|
||||
File "/home/alans/Work/GPy/GPy/examples/coreg_example.py", line 44, in <module>
|
||||
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1], Y_list=[Y1], Z_list = [Z1], kernel=kern)
|
||||
File "/home/alans/Work/GPy/GPy/core/parameterization/parameterized.py", line 19, in __call__
|
||||
self = super(ParametersChangedMeta, self).__call__(*args, **kw)
|
||||
File "/home/alans/Work/GPy/GPy/models/sparse_gp_coregionalized_regression.py", line 66, in __init__
|
||||
self['.*inducing'][:,-1].fix()
|
||||
File "/home/alans/Work/GPy/GPy/core/parameterization/parameter_core.py", line 444, in constrain_fixed
|
||||
self.notify_observers(self, None if trigger_parent else -np.inf)
|
||||
File "/home/alans/Work/GPy/GPy/core/parameterization/parameter_core.py", line 148, in notify_observers
|
||||
[callble(self, which=which) for _, _, callble in self.observers]
|
||||
File "/home/alans/Work/GPy/GPy/core/parameterization/parameter_core.py", line 1077, in _pass_through_notify_observers
|
||||
self.notify_observers(which=which)
|
||||
File "/home/alans/Work/GPy/GPy/core/parameterization/parameter_core.py", line 148, in notify_observers
|
||||
[callble(self, which=which) for _, _, callble in self.observers]
|
||||
File "/home/alans/Work/GPy/GPy/core/parameterization/parameter_core.py", line 1075, in _parameters_changed_notification
|
||||
self.parameters_changed()
|
||||
File "/home/alans/Work/GPy/GPy/core/sparse_gp.py", line 66, in parameters_changed
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)
|
||||
File "/home/alans/Work/GPy/GPy/inference/latent_function_inference/var_dtc.py", line 170, in inference
|
||||
import ipdb; ipdb.set_trace()
|
||||
ImportError: No module named ipdb
|
||||
/home/alans/Work/GPy/GPy/kern/_src/coregionalize.py:docstring of GPy.kern._src.coregionalize.Coregionalize:5: ERROR: Unexpected indentation.
|
||||
/home/alans/Work/GPy/doc/GPy.kern._src.rst:73: WARNING: autodoc: failed to import module u'GPy.kern._src.hierarchical'; the following exception was raised:
|
||||
Traceback (most recent call last):
|
||||
|
|
@ -187,6 +162,7 @@ Parameter handles
|
|||
|
||||
:py:class:`~GPy.core.parameterization.param.Param`
|
||||
===========
|
||||
/home/alans/Work/GPy/doc/installation.rst:: WARNING: document isn't included in any toctree
|
||||
/home/alans/Work/GPy/doc/kernel_implementation.rst:: WARNING: document isn't included in any toctree
|
||||
/home/alans/Work/GPy/doc/modules.rst:: WARNING: document isn't included in any toctree
|
||||
/home/alans/Work/GPy/doc/tuto_GP_regression.rst:: WARNING: document isn't included in any toctree
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue