[updates] starting to extract out standalone modules

This commit is contained in:
Max Zwiessele 2014-11-11 10:21:58 +00:00
commit 9b2e49c949
19 changed files with 563 additions and 958 deletions

View file

@ -8,5 +8,4 @@ from parameterization.observable_array import ObsAr
from gp import GP from gp import GP
from sparse_gp import SparseGP from sparse_gp import SparseGP
from svigp import SVIGP
from mapping import * from mapping import *

View file

@ -3,16 +3,12 @@
import numpy as np import numpy as np
import sys import sys
import warnings
from .. import kern from .. import kern
from ..util.linalg import dtrtrs
from model import Model from model import Model
from parameterization import ObsAr from parameterization import ObsAr
from .. import likelihoods from .. import likelihoods
from ..likelihoods.gaussian import Gaussian from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation, LatentFunctionInference
from parameterization.variational import VariationalPosterior from parameterization.variational import VariationalPosterior
from scipy.sparse.base import issparse
import logging import logging
from GPy.util.normalizer import MeanNorm from GPy.util.normalizer import MeanNorm
@ -26,7 +22,7 @@ class GP(Model):
:param Y: output observations :param Y: output observations
:param kernel: a GPy kernel, defaults to rbf+white :param kernel: a GPy kernel, defaults to rbf+white
:param likelihood: a GPy likelihood :param likelihood: a GPy likelihood
:param :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference_method: The inference method to use for this GP :param inference_method: The :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference method to use for this GP
:rtype: model object :rtype: model object
:param Norm normalizer: :param Norm normalizer:
normalize the outputs Y. normalize the outputs Y.
@ -95,10 +91,13 @@ class GP(Model):
def set_XY(self, X=None, Y=None): def set_XY(self, X=None, Y=None):
""" """
Set the input / output of the model Set the input / output data of the model
This is useful if we wish to change our existing data but maintain the same model
:param X: input observations :param X: input observations
:type X: np.ndarray
:param Y: output observations :param Y: output observations
:type Y: np.ndarray
""" """
self.update_model(False) self.update_model(False)
if Y is not None: if Y is not None:
@ -112,7 +111,6 @@ class GP(Model):
if X is not None: if X is not None:
if self.X in self.parameters: if self.X in self.parameters:
# LVM models # LVM models
from ..core.parameterization.variational import VariationalPosterior
if isinstance(self.X, VariationalPosterior): if isinstance(self.X, VariationalPosterior):
assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!" assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
self.unlink_parameter(self.X) self.unlink_parameter(self.X)
@ -129,22 +127,39 @@ class GP(Model):
def set_X(self,X): def set_X(self,X):
""" """
Set the input of the model Set the input data of the model
:param X: input observations
:type X: np.ndarray
""" """
self.set_XY(X=X) self.set_XY(X=X)
def set_Y(self,Y): def set_Y(self,Y):
""" """
Set the input of the model Set the output data of the model
:param X: output observations
:type X: np.ndarray
""" """
self.set_XY(Y=Y) self.set_XY(Y=Y)
def parameters_changed(self): def parameters_changed(self):
"""
Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood and gradients of the model
.. warning::
This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
this method yourself, there may be unexpected consequences.
"""
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL']) self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X) self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
def log_likelihood(self): def log_likelihood(self):
"""
The log marginal likelihood of the model, :math:`p(\mathbf{y})`, this is the objective function of the model being optimised
"""
return self._log_marginal_likelihood return self._log_marginal_likelihood
def _raw_predict(self, _Xnew, full_cov=False, kern=None): def _raw_predict(self, _Xnew, full_cov=False, kern=None):
@ -155,12 +170,10 @@ class GP(Model):
of the prediction is computed. If full_cov is False (default), only the of the prediction is computed. If full_cov is False (default), only the
diagonal of the covariance is returned. diagonal of the covariance is returned.
$$ .. math::
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*} = N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance} \Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
$$
""" """
if kern is None: if kern is None:
kern = self.kern kern = self.kern
@ -185,23 +198,20 @@ class GP(Model):
Predict the function(s) at the new point(s) Xnew. Predict the function(s) at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction :param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim :type Xnew: np.ndarray (Nnew x self.input_dim)
:param full_cov: whether to return the full covariance matrix, or just :param full_cov: whether to return the full covariance matrix, or just
the diagonal the diagonal
:type full_cov: bool :type full_cov: bool
:param Y_metadata: metadata about the predicting point to pass to the likelihood :param Y_metadata: metadata about the predicting point to pass to the likelihood
:param kern: The kernel to use for prediction (defaults to the model :param kern: The kernel to use for prediction (defaults to the model
kern). this is useful for examining e.g. subprocesses. kern). this is useful for examining e.g. subprocesses.
:returns: mean: posterior mean, a Numpy array, Nnew x self.input_dim :returns: (mean, var, lower_upper):
:returns: var: posterior variance, a Numpy array, Nnew x 1 if mean: posterior mean, a Numpy array, Nnew x self.input_dim
full_cov=False, Nnew x Nnew otherwise var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:returns: lower and upper boundaries of the 95% confidence intervals, lower_upper: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
Numpy arrays, Nnew x self.input_dim
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions. This is to allow for different normalizations of the output dimensions.
""" """
#predict the latent function values #predict the latent function values
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern) mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
@ -213,6 +223,16 @@ class GP(Model):
return mean, var return mean, var
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None): def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
"""
Get the predictive quantiles around the prediction at X
:param X: The points at which to make a prediction
:type X: np.ndarray (Xnew x self.input_dim)
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
:type quantiles: tuple
:returns: list of quantiles for each X and predictive quantiles for interval combination
:rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)]
"""
m, v = self._raw_predict(X, full_cov=False) m, v = self._raw_predict(X, full_cov=False)
if self.normalizer is not None: if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v) m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
@ -225,7 +245,12 @@ class GP(Model):
Given a set of points at which to predict X* (size [N*,Q]), compute the Given a set of points at which to predict X* (size [N*,Q]), compute the
derivatives of the mean and variance. Resulting arrays are sized: derivatives of the mean and variance. Resulting arrays are sized:
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one). dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
dv_dX* -- [N*, Q], (since all outputs have the same variance) dv_dX* -- [N*, Q], (since all outputs have the same variance)
:param X: The points at which to get the predictive gradients
:type X: np.ndarray (Xnew x self.input_dim)
:returns: dmu_dX, dv_dX
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
""" """
dmu_dX = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim)) dmu_dX = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
@ -245,12 +270,13 @@ class GP(Model):
Samples the posterior GP at the points X. Samples the posterior GP at the points X.
:param X: The points at which to take the samples. :param X: The points at which to take the samples.
:type X: np.ndarray, Nnew x self.input_dim. :type X: np.ndarray (Nnew x self.input_dim)
:param size: the number of a posteriori samples. :param size: the number of a posteriori samples.
:type size: int. :type size: int.
:param full_cov: whether to return the full covariance matrix, or just the diagonal. :param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool. :type full_cov: bool.
:returns: Ysim: set of simulations, a Numpy array (N x samples). :returns: Ysim: set of simulations
:rtype: np.ndarray (N x samples)
""" """
m, v = self._raw_predict(X, full_cov=full_cov) m, v = self._raw_predict(X, full_cov=full_cov)
if self.normalizer is not None: if self.normalizer is not None:
@ -268,7 +294,7 @@ class GP(Model):
Samples the posterior GP at the points X. Samples the posterior GP at the points X.
:param X: the points at which to take the samples. :param X: the points at which to take the samples.
:type X: np.ndarray, Nnew x self.input_dim. :type X: np.ndarray (Nnew x self.input_dim.)
:param size: the number of a posteriori samples. :param size: the number of a posteriori samples.
:type size: int. :type size: int.
:param full_cov: whether to return the full covariance matrix, or just the diagonal. :param full_cov: whether to return the full covariance matrix, or just the diagonal.
@ -292,6 +318,37 @@ class GP(Model):
This is a call to plot with plot_raw=True. This is a call to plot with plot_raw=True.
Data will not be plotted in this, as the GP's view of the world Data will not be plotted in this, as the GP's view of the world
may live in another space, or units then the data. may live in another space, or units then the data.
Can plot only part of the data and part of the posterior functions
using which_data_rowsm which_data_ycols.
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_ycols: 'all' or a list of integers
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
:type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param fillcol: color of fill [Tango.colorsHex['lightBlue']]
:type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param Y_metadata: additional data associated with Y which may be needed
:type Y_metadata: dict
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
""" """
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots from ..plotting.matplot_dep import models_plots
@ -325,12 +382,13 @@ class GP(Model):
:param which_data_rows: which of the training data to plot (default all) :param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y :type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these :param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers :type which_data_ycols: 'all' or a list of integers
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples :type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int :type resolution: int
:param levels: number of levels to plot in a contour plot. :param levels: number of levels to plot in a contour plot.
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:type levels: int :type levels: int
:param samples: the number of a posteriori samples to plot :param samples: the number of a posteriori samples to plot
:type samples: int :type samples: int
@ -338,11 +396,14 @@ class GP(Model):
:type fignum: figure number :type fignum: figure number
:param ax: axes to plot on. :param ax: axes to plot on.
:type ax: axes handle :type ax: axes handle
:type output: integer (first output is 0)
:param linecol: color of line to plot [Tango.colorsHex['darkBlue']] :param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
:type linecol: :type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param fillcol: color of fill [Tango.colorsHex['lightBlue']] :param fillcol: color of fill [Tango.colorsHex['lightBlue']]
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure :type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param Y_metadata: additional data associated with Y which may be needed
:type Y_metadata: dict
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
""" """
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots from ..plotting.matplot_dep import models_plots
@ -372,10 +433,8 @@ class GP(Model):
:type max_f_eval: int :type max_f_eval: int
:messages: whether to display during optimisation :messages: whether to display during optimisation
:type messages: bool :type messages: bool
:param optimizer: which optimizer to use (defaults to self.preferred optimizer) :param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'.
:type optimizer: string :type optimizer: string
TODO: valid args
""" """
self.inference_method.on_optimization_start() self.inference_method.on_optimization_start()
try: try:
@ -394,7 +453,11 @@ class GP(Model):
:param optimize: whether to optimize the location of new X (True by default) :param optimize: whether to optimize the location of new X (True by default)
:type optimize: boolean :type optimize: boolean
:return: a tuple containing the posterior estimation of X and the model that optimize X :return: a tuple containing the posterior estimation of X and the model that optimize X
<<<<<<< HEAD
:rtype: (GPy.core.parameterization.variational.VariationalPosterior or numpy.ndarray, GPy.core.Model) :rtype: (GPy.core.parameterization.variational.VariationalPosterior or numpy.ndarray, GPy.core.Model)
=======
:rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` or numpy.ndarray, :class:`~GPy.core.model.Model`)
>>>>>>> 22d30d9d39c70f806fe5bcb815cce9c8eb0f8dca
""" """
from ..inference.latent_function_inference.inferenceX import infer_newX from ..inference.latent_function_inference.inferenceX import infer_newX
return infer_newX(self, Y_new, optimize=optimize) return infer_newX(self, Y_new, optimize=optimize)

View file

@ -234,9 +234,9 @@ class Model(Parameterized):
""" """
if self.is_fixed: if self.is_fixed:
raise RuntimeError, "Cannot optimize, when everything is fixed" print 'nothing to optimize'
if self.size == 0: if self.size == 0:
raise RuntimeError, "Model without parameters cannot be optimized" print 'nothing to optimize'
if start == None: if start == None:
start = self.optimizer_array start = self.optimizer_array

View file

@ -60,7 +60,6 @@ class SparseGP_MPI(SparseGP):
mpi_comm.Bcast(self.param_array, root=0) mpi_comm.Bcast(self.param_array, root=0)
self.update_model(True) self.update_model(True)
def __getstate__(self): def __getstate__(self):
dc = super(SparseGP_MPI, self).__getstate__() dc = super(SparseGP_MPI, self).__getstate__()
dc['mpi_comm'] = None dc['mpi_comm'] = None

View file

@ -1,434 +0,0 @@
# Copyright (c) 2012, James Hensman and Nicolo' Fusi
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs, jitchol, backsub_both_sides
from gp import GP
import time
import sys
class SVIGP(GP):
"""
Stochastic Variational inference in a Gaussian Process
:param X: inputs
:type X: np.ndarray (N x Q)
:param Y: observed data
:type Y: np.ndarray of observations (N x D)
:param batchsize: the size of a h
Additional kwargs are used as for a sparse GP. They include:
:param q_u: canonical parameters of the distribution sqehd into a 1D array
:type q_u: np.ndarray
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param kernel: the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param X_uncertainty: The uncertainty in the measurements of X (Gaussian variance)
:type X_uncertainty: np.ndarray (N x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param beta: noise precision. TODO: ignore beta if doing EP
:type beta: float
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, Y, kernel, Z, q_u=None, batchsize=10):
raise NotImplementedError, "This is a work in progress, see github issue "
GP.__init__(self, X, Y, kernel)
self.batchsize=batchsize
self.Z = Z
self.num_inducing = Z.shape[0]
self.batchcounter = 0
self.epochs = 0
self.iterations = 0
self.vb_steplength = 0.05
self.param_steplength = 1e-5
self.momentum = 0.9
if q_u is None:
q_u = np.hstack((np.random.randn(self.num_inducing*self.output_dim),-.5*np.eye(self.num_inducing).flatten()))
self.set_vb_param(q_u)
self._permutation = np.random.permutation(self.num_data)
self.load_batch()
self._param_trace = []
self._ll_trace = []
self._grad_trace = []
#set the adaptive steplength parameters
#self.hbar_t = 0.0
#self.tau_t = 100.0
#self.gbar_t = 0.0
#self.gbar_t1 = 0.0
#self.gbar_t2 = 0.0
#self.hbar_tp = 0.0
#self.tau_tp = 10000.0
#self.gbar_tp = 0.0
#self.adapt_param_steplength = True
#self.adapt_vb_steplength = True
#self._param_steplength_trace = []
#self._vb_steplength_trace = []
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
self.psi0 = self.kern.Kdiag(self.X_batch)
self.psi1 = self.kern.K(self.X_batch, self.Z)
self.psi2 = None
def dL_dtheta(self):
dL_dtheta = self.kern._param_grad_helper(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch)
else:
dL_dtheta += self.kern._param_grad_helper(self.dL_dpsi1, self.X_batch, self.Z)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch)
return dL_dtheta
def _set_params(self, p, computations=True):
self.kern._set_params_transformed(p[:self.kern.num_params])
self.likelihood._set_params(p[self.kern.num_params:])
if computations:
self._compute_kernel_matrices()
self._computations()
def _get_params(self):
return np.hstack((self.kern._get_params_transformed() , self.likelihood._get_params()))
def _get_param_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def load_batch(self):
"""
load a batch of data (set self.X_batch and self.Y_batch from self.X, self.Y)
"""
#if we've seen all the data, start again with them in a new random order
if self.batchcounter+self.batchsize > self.num_data:
self.batchcounter = 0
self.epochs += 1
self._permutation = np.random.permutation(self.num_data)
this_perm = self._permutation[self.batchcounter:self.batchcounter+self.batchsize]
self.X_batch = self.X[this_perm]
self.Y_batch = self.Y[this_perm]
self.batchcounter += self.batchsize
self.data_prop = float(self.batchsize)/self.num_data
self._compute_kernel_matrices()
self._computations()
def _computations(self,do_Kmm=True, do_Kmm_grad=True):
"""
All of the computations needed. Some are optional, see kwargs.
"""
if do_Kmm:
self.Lm = jitchol(self.Kmm)
# The rather complex computations of self.A
if self.has_uncertain_inputs:
if self.likelihood.is_heteroscedastic:
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.batchsize, 1, 1))).sum(0)
else:
psi2_beta = self.psi2.sum(0) * self.likelihood.precision
evals, evecs = np.linalg.eigh(psi2_beta)
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
tmp = evecs * np.sqrt(clipped_evals)
else:
if self.likelihood.is_heteroscedastic:
tmp = self.psi1.T * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.batchsize)))
else:
tmp = self.psi1.T * (np.sqrt(self.likelihood.precision))
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
self.V = self.likelihood.precision*self.likelihood.Y
self.VmT = np.dot(self.V,self.q_u_expectation[0].T)
self.psi1V = np.dot(self.psi1.T, self.V)
self.B = np.eye(self.num_inducing)*self.data_prop + self.A
self.Lambda = backsub_both_sides(self.Lm, self.B.T)
self.LQL = backsub_both_sides(self.Lm,self.q_u_expectation[1].T,transpose='right')
self.trace_K = self.psi0.sum() - np.trace(self.A)/self.likelihood.precision
self.Kmmi_m, _ = dpotrs(self.Lm, self.q_u_expectation[0], lower=1)
self.projected_mean = np.dot(self.psi1,self.Kmmi_m)
# Compute dL_dpsi
self.dL_dpsi0 = - 0.5 * self.output_dim * self.likelihood.precision * np.ones(self.batchsize)
self.dL_dpsi1, _ = dpotrs(self.Lm,np.asfortranarray(self.VmT.T),lower=1)
self.dL_dpsi1 = self.dL_dpsi1.T
dL_dpsi2 = -0.5 * self.likelihood.precision * backsub_both_sides(self.Lm, self.LQL - self.output_dim * np.eye(self.num_inducing))
if self.has_uncertain_inputs:
self.dL_dpsi2 = np.repeat(dL_dpsi2[None,:,:],self.batchsize,axis=0)
else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2,self.psi1.T).T
self.dL_dpsi2 = None
# Compute dL_dKmm
if do_Kmm_grad:
tmp = np.dot(self.LQL,self.A) - backsub_both_sides(self.Lm,np.dot(self.q_u_expectation[0],self.psi1V.T),transpose='right')
tmp += tmp.T
tmp += -self.output_dim*self.B
tmp += self.data_prop*self.LQL
self.dL_dKmm = 0.5*backsub_both_sides(self.Lm,tmp)
#Compute the gradient of the log likelihood wrt noise variance
self.partial_for_likelihood = -0.5*(self.batchsize*self.output_dim - np.sum(self.A*self.LQL))*self.likelihood.precision
self.partial_for_likelihood += (0.5*self.output_dim*self.trace_K + 0.5 * self.likelihood.trYYT - np.sum(self.likelihood.Y*self.projected_mean))*self.likelihood.precision**2
def log_likelihood(self):
"""
As for uncollapsed sparse GP, but account for the proportion of data we're looking at right now.
NB. self.batchsize is the size of the batch, not the size of X_all
"""
assert not self.likelihood.is_heteroscedastic
A = -0.5*self.batchsize*self.output_dim*(np.log(2.*np.pi) - np.log(self.likelihood.precision))
B = -0.5*self.likelihood.precision*self.output_dim*self.trace_K
Kmm_logdet = 2.*np.sum(np.log(np.diag(self.Lm)))
C = -0.5*self.output_dim*self.data_prop*(Kmm_logdet-self.q_u_logdet - self.num_inducing)
C += -0.5*np.sum(self.LQL * self.B)
D = -0.5*self.likelihood.precision*self.likelihood.trYYT
E = np.sum(self.V*self.projected_mean)
return (A+B+C+D+E)/self.data_prop
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))/self.data_prop
def vb_grad_natgrad(self):
"""
Compute the gradients of the lower bound wrt the canonical and
Expectation parameters of u.
Note that the natural gradient in either is given by the gradient in
the other (See Hensman et al 2012 Fast Variational inference in the
conjugate exponential Family)
"""
# Gradient for eta
dL_dmmT_S = -0.5*self.Lambda/self.data_prop + 0.5*self.q_u_prec
Kmmipsi1V,_ = dpotrs(self.Lm,self.psi1V,lower=1)
dL_dm = (Kmmipsi1V - np.dot(self.Lambda,self.q_u_mean))/self.data_prop
# Gradients for theta
S = self.q_u_cov
Si = self.q_u_prec
m = self.q_u_mean
dL_dSi = -mdot(S,dL_dmmT_S, S)
dL_dmhSi = -2*dL_dSi
dL_dSim = np.dot(dL_dSi,m) + np.dot(Si, dL_dm)
return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) , np.hstack((dL_dSim.flatten(), dL_dmhSi.flatten()))
def optimize(self, iterations, print_interval=10, callback=lambda:None, callback_interval=5):
param_step = 0.
#Iterate!
for i in range(iterations):
#store the current configuration for plotting later
self._param_trace.append(self._get_params())
self._ll_trace.append(self.log_likelihood() + self.log_prior())
#load a batch
self.load_batch()
#compute the (stochastic) gradient
natgrads = self.vb_grad_natgrad()
grads = self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
self._grad_trace.append(grads)
#compute the steps in all parameters
vb_step = self.vb_steplength*natgrads[0]
if (self.epochs>=1):#only move the parameters after the first epoch
param_step = self.momentum*param_step + self.param_steplength*grads
else:
param_step = 0.
self.set_vb_param(self.get_vb_param() + vb_step)
#Note: don't recompute everything here, wait until the next iteration when we have a new batch
self._set_params(self._untransform_params(self._get_params_transformed() + param_step), computations=False)
#print messages if desired
if i and (not i%print_interval):
print i, np.mean(self._ll_trace[-print_interval:]) #, self.log_likelihood()
print np.round(np.mean(self._grad_trace[-print_interval:],0),3)
sys.stdout.flush()
#callback
if i and not i%callback_interval:
callback(self) # Change this to callback()
time.sleep(0.01)
if self.epochs > 10:
self._adapt_steplength()
self.iterations += 1
def _adapt_steplength(self):
if self.adapt_vb_steplength:
# self._adaptive_vb_steplength()
self._adaptive_vb_steplength_KL()
self._vb_steplength_trace.append(self.vb_steplength)
assert self.vb_steplength > 0
if self.adapt_param_steplength:
self._adaptive_param_steplength()
# self._adaptive_param_steplength_log()
# self._adaptive_param_steplength_from_vb()
self._param_steplength_trace.append(self.param_steplength)
def _adaptive_param_steplength(self):
decr_factor = 0.02
g_tp = self._transform_gradients(self._log_likelihood_gradients())
self.gbar_tp = (1-1/self.tau_tp)*self.gbar_tp + 1/self.tau_tp * g_tp
self.hbar_tp = (1-1/self.tau_tp)*self.hbar_tp + 1/self.tau_tp * np.dot(g_tp.T, g_tp)
new_param_steplength = np.dot(self.gbar_tp.T, self.gbar_tp) / self.hbar_tp
#- hack
new_param_steplength *= decr_factor
self.param_steplength = (self.param_steplength + new_param_steplength)/2
#-
self.tau_tp = self.tau_tp*(1-self.param_steplength) + 1
def _adaptive_param_steplength_log(self):
stp = np.logspace(np.log(0.0001), np.log(1e-6), base=np.e, num=18000)
self.param_steplength = stp[self.iterations]
def _adaptive_param_steplength_log2(self):
self.param_steplength = (self.iterations + 0.001)**-0.5
def _adaptive_param_steplength_from_vb(self):
self.param_steplength = self.vb_steplength * 0.01
def _adaptive_vb_steplength(self):
decr_factor = 0.1
g_t = self.vb_grad_natgrad()[0]
self.gbar_t = (1-1/self.tau_t)*self.gbar_t + 1/self.tau_t * g_t
self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t.T, g_t)
new_vb_steplength = np.dot(self.gbar_t.T, self.gbar_t) / self.hbar_t
#- hack
new_vb_steplength *= decr_factor
self.vb_steplength = (self.vb_steplength + new_vb_steplength)/2
#-
self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
def _adaptive_vb_steplength_KL(self):
decr_factor = 0.1
natgrad = self.vb_grad_natgrad()
g_t1 = natgrad[0]
g_t2 = natgrad[1]
self.gbar_t1 = (1-1/self.tau_t)*self.gbar_t1 + 1/self.tau_t * g_t1
self.gbar_t2 = (1-1/self.tau_t)*self.gbar_t2 + 1/self.tau_t * g_t2
self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t1.T, g_t2)
self.vb_steplength = np.dot(self.gbar_t1.T, self.gbar_t2) / self.hbar_t
self.vb_steplength *= decr_factor
self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
def _raw_predict(self, X_new, X_variance_new=None, which_parts='all',full_cov=False):
"""Internal helper function for making predictions, does not account for normalization"""
#TODO: make this more efficient!
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
if X_variance_new is None:
Kx = self.kern.K(X_new,self.Z)
mu = np.dot(Kx,self.Kmmi_m)
if full_cov:
Kxx = self.kern.K(X_new)
var = Kxx - mdot(Kx,tmp,Kx.T)
else:
Kxx = self.kern.Kdiag(X_new)
var = (Kxx - np.sum(Kx*np.dot(Kx,tmp),1))[:,None]
return mu, var
else:
assert X_variance_new.shape == X_new.shape
Kx = self.kern.psi1(self.Z,X_new, X_variance_new)
mu = np.dot(Kx,self.Kmmi_m)
Kxx = self.kern.psi0(self.Z,X_new,X_variance_new)
psi2 = self.kern.psi2(self.Z,X_new,X_variance_new)
diag_var = Kxx - np.sum(np.sum(psi2*tmp[None,:,:],1),1)
if full_cov:
raise NotImplementedError
else:
return mu, diag_var[:,None]
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
if X_variance_new is not None:
X_variance_new = X_variance_new / self._Xscale ** 2
# here's the actual prediction by the GP model
mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
return mean, var, _025pm, _975pm
def set_vb_param(self,vb_param):
"""set the distribution q(u) from the canonical parameters"""
self.q_u_canonical_flat = vb_param.copy()
self.q_u_canonical = self.q_u_canonical_flat[:self.num_inducing*self.output_dim].reshape(self.num_inducing,self.output_dim),self.q_u_canonical_flat[self.num_inducing*self.output_dim:].reshape(self.num_inducing,self.num_inducing)
self.q_u_prec = -2.*self.q_u_canonical[1]
self.q_u_cov, q_u_Li, q_u_L, tmp = pdinv(self.q_u_prec)
self.q_u_Li = q_u_Li
self.q_u_logdet = -tmp
self.q_u_mean, _ = dpotrs(q_u_Li, np.asfortranarray(self.q_u_canonical[0]),lower=1)
self.q_u_expectation = (self.q_u_mean, np.dot(self.q_u_mean,self.q_u_mean.T)+self.q_u_cov*self.output_dim)
def get_vb_param(self):
"""
Return the canonical parameters of the distribution q(u)
"""
return self.q_u_canonical_flat
def plot(self, *args, **kwargs):
"""
See GPy.plotting.matplot_dep.svig_plots.plot
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import svig_plots
svig_plots.plot(self,*args,**kwargs)
def plot_traces(self):
"""
See GPy.plotting.matplot_dep.svig_plots.plot_traces
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import svig_plots
svig_plots.plot_traces(self)

View file

@ -4,6 +4,4 @@
import classification import classification
import regression import regression
import dimensionality_reduction import dimensionality_reduction
import tutorials
import stochastic
import non_gaussian import non_gaussian

View file

@ -28,13 +28,13 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, num_inducing=num_inducing) m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, num_inducing=num_inducing)
# Contrain all parameters to be positive # Contrain all parameters to be positive
m.tie_params('.*len') #m.tie_params('.*len')
m['.*len'] = 10. m['.*len'] = 10.
m.update_likelihood_approximation()
# Optimize # Optimize
if optimize: if optimize:
m.optimize(max_iters=max_iters) for _ in range(5):
m.optimize(max_iters=int(max_iters/5))
print(m) print(m)
#Test #Test
@ -150,7 +150,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
print m print m
return m return m
def toy_heaviside(seed=default_seed, optimize=True, plot=True): def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
""" """
Simple 1D classification example using a heavy side gp transformation Simple 1D classification example using a heavy side gp transformation
@ -166,16 +166,18 @@ def toy_heaviside(seed=default_seed, optimize=True, plot=True):
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
# Model definition # Model definition
noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside()) kernel = GPy.kern.RBF(1)
likelihood = GPy.likelihoods.EP(Y, noise_model) likelihood = GPy.likelihoods.Bernoulli(gp_link=GPy.likelihoods.link_functions.Heaviside())
m = GPy.models.GPClassification(data['X'], likelihood=likelihood) ep = GPy.inference.latent_function_inference.expectation_propagation.EP()
m = GPy.core.GP(X=data['X'], Y=Y, kernel=kernel, likelihood=likelihood, inference_method=ep, name='gp_classification_heaviside')
#m = GPy.models.GPClassification(data['X'], likelihood=likelihood)
# Optimize # Optimize
if optimize: if optimize:
m.update_likelihood_approximation()
# Parameters optimization: # Parameters optimization:
m.optimize() for _ in range(5):
#m.pseudo_EM() m.optimize(max_iters=int(max_iters/5))
print m
# Plot # Plot
if plot: if plot:

View file

@ -59,7 +59,7 @@ def student_t_approx(optimize=True, plot=True):
t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
laplace_inf = GPy.inference.latent_function_inference.Laplace() laplace_inf = GPy.inference.latent_function_inference.Laplace()
m3 = GPy.core.GP(X, Y.copy(), kernel3, likelihood=t_distribution, inference_method=laplace_inf) m3 = GPy.core.GP(X, Y.copy(), kernel3, likelihood=t_distribution, inference_method=laplace_inf)
m3['.*t_noise'].constrain_bounded(1e-6, 10.) m3['.*t_scale2'].constrain_bounded(1e-6, 10.)
m3['.*white'].constrain_fixed(1e-5) m3['.*white'].constrain_fixed(1e-5)
m3.randomize() m3.randomize()
@ -67,7 +67,7 @@ def student_t_approx(optimize=True, plot=True):
t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
laplace_inf = GPy.inference.latent_function_inference.Laplace() laplace_inf = GPy.inference.latent_function_inference.Laplace()
m4 = GPy.core.GP(X, Yc.copy(), kernel4, likelihood=t_distribution, inference_method=laplace_inf) m4 = GPy.core.GP(X, Yc.copy(), kernel4, likelihood=t_distribution, inference_method=laplace_inf)
m4['.*t_noise'].constrain_bounded(1e-6, 10.) m4['.*t_scale2'].constrain_bounded(1e-6, 10.)
m4['.*white'].constrain_fixed(1e-5) m4['.*white'].constrain_fixed(1e-5)
m4.randomize() m4.randomize()
print m4 print m4
@ -124,6 +124,7 @@ def student_t_approx(optimize=True, plot=True):
return m1, m2, m3, m4 return m1, m2, m3, m4
def boston_example(optimize=True, plot=True): def boston_example(optimize=True, plot=True):
raise NotImplementedError("Needs updating")
import sklearn import sklearn
from sklearn.cross_validation import KFold from sklearn.cross_validation import KFold
optimizer='bfgs' optimizer='bfgs'
@ -152,8 +153,8 @@ def boston_example(optimize=True, plot=True):
noise = 1e-1 #np.exp(-2) noise = 1e-1 #np.exp(-2)
rbf_len = 0.5 rbf_len = 0.5
data_axis_plot = 4 data_axis_plot = 4
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) kernelstu = GPy.kern.RBF(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) kernelgp = GPy.kern.RBF(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
#Baseline #Baseline
score_folds[0, n] = rmse(Y_test, np.mean(Y_train)) score_folds[0, n] = rmse(Y_test, np.mean(Y_train))
@ -162,8 +163,8 @@ def boston_example(optimize=True, plot=True):
print "Gauss GP" print "Gauss GP"
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy()) mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
mgp.constrain_fixed('.*white', 1e-5) mgp.constrain_fixed('.*white', 1e-5)
mgp['rbf_len'] = rbf_len mgp['.*len'] = rbf_len
mgp['noise'] = noise mgp['.*noise'] = noise
print mgp print mgp
if optimize: if optimize:
mgp.optimize(optimizer=optimizer, messages=messages) mgp.optimize(optimizer=optimizer, messages=messages)
@ -198,9 +199,9 @@ def boston_example(optimize=True, plot=True):
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution) stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood) mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
mstu_t.constrain_fixed('.*white', 1e-5) mstu_t.constrain_fixed('.*white', 1e-5)
mstu_t.constrain_bounded('.*t_noise', 0.0001, 1000) mstu_t.constrain_bounded('.*t_scale2', 0.0001, 1000)
mstu_t['rbf_len'] = rbf_len mstu_t['rbf_len'] = rbf_len
mstu_t['.*t_noise'] = noise mstu_t['.*t_scale2'] = noise
print mstu_t print mstu_t
if optimize: if optimize:
mstu_t.optimize(optimizer=optimizer, messages=messages) mstu_t.optimize(optimizer=optimizer, messages=messages)

View file

@ -1,40 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
try:
import pylab as pb
except:
pass
import numpy as np
import GPy
def toy_1d(optimize=True, plot=True):
N = 2000
M = 20
#create data
X = np.linspace(0,32,N)[:,None]
Z = np.linspace(0,32,M)[:,None]
Y = np.sin(X) + np.cos(0.3*X) + np.random.randn(*X.shape)/np.sqrt(50.)
m = GPy.models.SVIGPRegression(X,Y, batchsize=10, Z=Z)
m.constrain_bounded('noise_variance',1e-3,1e-1)
m.constrain_bounded('white_variance',1e-3,1e-1)
m.param_steplength = 1e-4
if plot:
fig = pb.figure()
ax = fig.add_subplot(111)
def cb(foo):
ax.cla()
m.plot(ax=ax,Z_height=-3)
ax.set_ylim(-3,3)
fig.canvas.draw()
if optimize:
m.optimize(500, callback=cb, callback_interval=1)
if plot:
m.plot_traces()
return m

View file

@ -1,156 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Code of Tutorials
"""
try:
import pylab as pb
pb.ion()
except:
pass
import numpy as np
import GPy
def tuto_GP_regression(optimize=True, plot=True):
"""The detailed explanations of the commands used in this file can be found in the tutorial section"""
X = np.random.uniform(-3.,3.,(20,1))
Y = np.sin(X) + np.random.randn(20,1)*0.05
kernel = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
m = GPy.models.GPRegression(X, Y, kernel)
print m
if plot:
m.plot()
m.constrain_positive('')
m.unconstrain('') # may be used to remove the previous constrains
m.constrain_positive('.*rbf_variance')
m.constrain_bounded('.*lengthscale',1.,10. )
m.constrain_fixed('.*noise',0.0025)
if optimize:
m.optimize()
m.optimize_restarts(num_restarts = 10)
#######################################################
#######################################################
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(50,2))
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
# define kernel
ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
# create simple GP model
m = GPy.models.GPRegression(X, Y, ker)
# contrain all parameters to be positive
m.constrain_positive('')
# optimize and plot
if optimize:
m.optimize('tnc', max_f_eval = 1000)
if plot:
m.plot()
print m
return(m)
def tuto_kernel_overview(optimize=True, plot=True):
"""The detailed explanations of the commands used in this file can be found in the tutorial section"""
ker1 = GPy.kern.rbf(1) # Equivalent to ker1 = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
ker2 = GPy.kern.rbf(input_dim=1, variance = .75, lengthscale=2.)
ker3 = GPy.kern.rbf(1, .5, .5)
print ker2
if plot:
ker1.plot()
ker2.plot()
ker3.plot()
k1 = GPy.kern.rbf(1,1.,2.)
k2 = GPy.kern.Matern32(1, 0.5, 0.2)
# Product of kernels
k_prod = k1.prod(k2) # By default, tensor=False
k_prodtens = k1.prod(k2,tensor=True)
# Sum of kernels
k_add = k1.add(k2) # By default, tensor=False
k_addtens = k1.add(k2,tensor=True)
k1 = GPy.kern.rbf(1,1.,2)
k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
k = k1 * k2 # equivalent to k = k1.prod(k2)
print k
# Simulate sample paths
X = np.linspace(-5,5,501)[:,None]
Y = np.random.multivariate_normal(np.zeros(501),k.K(X),1)
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.Matern32(1)
k3 = GPy.kern.white(1)
k = k1 + k2 + k3
print k
k.constrain_positive('.*var')
k.constrain_fixed(np.array([1]),1.75)
k.tie_params('.*len')
k.unconstrain('white')
k.constrain_bounded('white',lower=1e-5,upper=.5)
print k
k_cst = GPy.kern.bias(1,variance=1.)
k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
Kanova = (k_cst + k_mat).prod(k_cst + k_mat,tensor=True)
print Kanova
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(40,2))
Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
# Create GP regression model
m = GPy.models.GPRegression(X, Y, Kanova)
if plot:
fig = pb.figure(figsize=(5,5))
ax = fig.add_subplot(111)
m.plot(ax=ax)
pb.figure(figsize=(20,3))
pb.subplots_adjust(wspace=0.5)
axs = pb.subplot(1,5,1)
m.plot(ax=axs)
pb.subplot(1,5,2)
pb.ylabel("= ",rotation='horizontal',fontsize='30')
axs = pb.subplot(1,5,3)
m.plot(ax=axs, which_parts=[False,True,False,False])
pb.ylabel("cst +",rotation='horizontal',fontsize='30')
axs = pb.subplot(1,5,4)
m.plot(ax=axs, which_parts=[False,False,True,False])
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
axs = pb.subplot(1,5,5)
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
m.plot(ax=axs, which_parts=[False,False,False,True])
return(m)
def model_interaction(optimize=True, plot=True):
X = np.random.randn(20,1)
Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5.
k = GPy.kern.rbf(1) + GPy.kern.bias(1)
m = GPy.models.GPRegression(X, Y, kernel=k)
return m

View file

@ -7,193 +7,392 @@ The package for the psi statistics computation
import numpy as np import numpy as np
def psicomputations(variance, lengthscale, Z, variational_posterior): try:
""" from scipy import weave
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
psi0 = np.empty(mu.shape[0]) def _psicomputations(variance, lengthscale, Z, variational_posterior):
psi0[:] = variance """
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma) Z - MxQ
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma) mu - NxQ
return psi0, psi1, psi2 S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
def _psi1computations(variance, lengthscale, Z, mu, S, gamma): N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
""" l2 = np.square(lengthscale)
Z - MxQ log_denom1 = np.log(S/l2+1)
mu - NxQ log_denom2 = np.log(2*S/l2+1)
S - NxQ log_gamma = np.log(gamma)
gamma - NxQ log_gamma1 = np.log(1.-gamma)
""" variance = float(variance)
# here are the "statistics" for psi1 psi0 = np.empty(N)
# Produced intermediate results: psi0[:] = variance
# _psi1 NxM psi1 = np.empty((N,M))
psi2n = np.empty((N,M,M))
lengthscale2 = np.square(lengthscale) from ....util.misc import param_to_array
S = param_to_array(S)
mu = param_to_array(mu)
gamma = param_to_array(gamma)
Z = param_to_array(Z)
# psi1 support_code = """
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ #include <math.h>
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ """
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ code = """
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ for(int n=0; n<N; n++) {
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ for(int m1=0;m1<M;m1++) {
_psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ double log_psi1=0;
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ for(int m2=0;m2<=m1;m2++) {
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) double log_psi2_n=0;
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ for(int q=0;q<Q;q++) {
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM double Snq = S(n,q);
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM double lq = l2(q);
double Zm1q = Z(m1,q);
double Zm2q = Z(m2,q);
return _psi1 if(m2==0) {
// Compute Psi_1
double muZ = mu(n,q)-Z(m1,q);
def _psi2computations(variance, lengthscale, Z, mu, S, gamma): double psi1_exp1 = log_gamma(n,q) - (muZ*muZ/(Snq+lq) +log_denom1(n,q))/2.;
""" double psi1_exp2 = log_gamma1(n,q) -Zm1q*Zm1q/(2.*lq);
Z - MxQ log_psi1 += (psi1_exp1>psi1_exp2)?psi1_exp1+log1p(exp(psi1_exp2-psi1_exp1)):psi1_exp2+log1p(exp(psi1_exp1-psi1_exp2));
mu - NxQ }
S - NxQ // Compute Psi_2
gamma - NxQ double muZhat = mu(n,q) - (Zm1q+Zm2q)/2.;
""" double Z2 = Zm1q*Zm1q+ Zm2q*Zm2q;
# here are the "statistics" for psi2 double dZ = Zm1q - Zm2q;
# Produced intermediate results:
# _psi2 MxM
lengthscale2 = np.square(lengthscale) double psi2_exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
double psi2_exp2 = log_gamma1(n,q) - Z2/(2.*lq);
log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2));
}
double exp_psi2_n = exp(log_psi2_n);
psi2n(n,m1,m2) = variance*variance*exp_psi2_n;
if(m1!=m2) { psi2n(n,m2,m1) = variance*variance*exp_psi2_n;}
}
psi1(n,m1) = variance*exp(log_psi1);
}
}
"""
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q psi2 = psi2n.sum(axis=0)
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q return psi0,psi1,psi2,psi2n
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2 from GPy.util.caching import Cacher
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ psicomputations = Cacher(_psicomputations, limit=1)
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
return _psi2 def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): _,psi1,_,psi2n = psicomputations(variance, lengthscale, Z, variational_posterior)
ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) mu = variational_posterior.mean
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) S = variational_posterior.variance
gamma = variational_posterior.binary_prob
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1)
log_denom2 = np.log(2*S/l2+1)
log_gamma = np.log(gamma)
log_gamma1 = np.log(1.-gamma)
variance = float(variance)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2 dvar = np.zeros(1)
dmu = np.zeros((N,Q))
dS = np.zeros((N,Q))
dgamma = np.zeros((N,Q))
dl = np.zeros(Q)
dZ = np.zeros((M,Q))
dvar += np.sum(dL_dpsi0)
dL_dlengscale = dl_psi1 + dl_psi2 from ....util.misc import param_to_array
if not ARD: S = param_to_array(S)
dL_dlengscale = dL_dlengscale.sum() mu = param_to_array(mu)
gamma = param_to_array(gamma)
Z = param_to_array(Z)
dL_dgamma = dgamma_psi1 + dgamma_psi2 support_code = """
dL_dmu = dmu_psi1 + dmu_psi2 #include <math.h>
dL_dS = dS_psi1 + dS_psi2 """
dL_dZ = dZ_psi1 + dZ_psi2 code = """
for(int n=0; n<N; n++) {
for(int m1=0;m1<M;m1++) {
double log_psi1=0;
for(int m2=0;m2<M;m2++) {
double log_psi2_n=0;
for(int q=0;q<Q;q++) {
double Snq = S(n,q);
double lq = l2(q);
double Zm1q = Z(m1,q);
double Zm2q = Z(m2,q);
double gnq = gamma(n,q);
double mu_nq = mu(n,q);
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma if(m2==0) {
// Compute Psi_1
double lpsi1 = psi1(n,m1)*dL_dpsi1(n,m1);
if(q==0) {dvar(0) += lpsi1/variance;}
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma): double Zmu = Zm1q - mu_nq;
""" double denom = Snq+lq;
dL_dpsi1 - NxM double Zmu2_denom = Zmu*Zmu/denom;
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale) double exp1 = log_gamma(n,q)-(Zmu*Zmu/(Snq+lq)+log_denom1(n,q))/(2.);
double exp2 = log_gamma1(n,q)-Zm1q*Zm1q/(2.*lq);
double d_exp1,d_exp2;
if(exp1>exp2) {
d_exp1 = 1.;
d_exp2 = exp(exp2-exp1);
} else {
d_exp1 = exp(exp1-exp2);
d_exp2 = 1.;
}
double exp_sum = d_exp1+d_exp2;
# psi1 dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
_psi1_denom = S / lengthscale2 + 1. # NxQ dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum);
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
_psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ }
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ // Compute Psi_2
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ double lpsi2 = psi2n(n,m1,m2)*dL_dpsi2(m1,m2);
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2) if(q==0) {dvar(0) += lpsi2*2/variance;}
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
_dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1
_dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ
_dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma double dZm1m2 = Zm1q - Zm2q;
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
double muZhat = mu_nq - (Zm1q + Zm2q)/2.;
double denom = 2.*Snq+lq;
double muZhat2_denom = muZhat*muZhat/denom;
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma): double exp1 = dZm1m2*dZm1m2/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
""" double exp2 = log_gamma1(n,q) - Z2/(2.*lq);
Z - MxQ double d_exp1,d_exp2;
mu - NxQ if(exp1>exp2) {
S - NxQ d_exp1 = 1.;
gamma - NxQ d_exp2 = exp(exp2-exp1);
dL_dpsi2 - MxM } else {
""" d_exp1 = exp(exp1-exp2);
# here are the "statistics" for psi2 d_exp2 = 1.;
# Produced the derivatives w.r.t. psi2: }
# _dL_dvariance 1 double exp_sum = d_exp1+d_exp2;
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale) dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
}
}
}
}
"""
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q dl *= 2.*lengthscale
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q if not ARD:
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q dl = dl.sum()
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2 return dvar, dl, dZ, dmu, dS, dgamma
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:])
_psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
_dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance
_dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z))
_dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma except:
def psicomputations(variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
psi0 = np.empty(mu.shape[0])
psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
return psi0, psi1, psi2
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results:
# _psi1 NxM
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
_psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
return _psi1
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi2
# Produced intermediate results:
# _psi2 MxM
lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2
if not ARD:
dL_dlengscale = dL_dlengscale.sum()
dL_dgamma = dgamma_psi1 + dgamma_psi2
dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
"""
dL_dpsi1 - NxM
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_denom = S / lengthscale2 + 1. # NxQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ
_psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
_dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1
_dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ
_dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
dL_dpsi2 - MxM
"""
# here are the "statistics" for psi2
# Produced the derivatives w.r.t. psi2:
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:])
_psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
_dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance
_dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z))
_dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma

View file

@ -4,7 +4,6 @@
from gp_regression import GPRegression from gp_regression import GPRegression
from gp_classification import GPClassification from gp_classification import GPClassification
from sparse_gp_regression import SparseGPRegression, SparseGPRegressionUncertainInput from sparse_gp_regression import SparseGPRegression, SparseGPRegressionUncertainInput
from svigp_regression import SVIGPRegression
from sparse_gp_classification import SparseGPClassification from sparse_gp_classification import SparseGPClassification
from gplvm import GPLVM from gplvm import GPLVM
from bcgplvm import BCGPLVM from bcgplvm import BCGPLVM

View file

@ -25,8 +25,6 @@ class BayesianGPLVM(SparseGP_MPI):
Z=None, kernel=None, inference_method=None, likelihood=None, Z=None, kernel=None, inference_method=None, likelihood=None,
name='bayesian gplvm', mpi_comm=None, normalizer=None, name='bayesian gplvm', mpi_comm=None, normalizer=None,
missing_data=False, stochastic=False, batchsize=1): missing_data=False, stochastic=False, batchsize=1):
self.mpi_comm = mpi_comm
self.__IN_OPTIMIZATION__ = False
self.logger = logging.getLogger(self.__class__.__name__) self.logger = logging.getLogger(self.__class__.__name__)
if X is None: if X is None:

View file

@ -9,6 +9,7 @@ from .. import likelihoods
from .. import kern from .. import kern
from ..inference.latent_function_inference import VarDTC from ..inference.latent_function_inference import VarDTC
from ..core.parameterization.variational import NormalPosterior from ..core.parameterization.variational import NormalPosterior
from GPy.inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
class SparseGPRegression(SparseGP_MPI): class SparseGPRegression(SparseGP_MPI):
""" """
@ -30,7 +31,7 @@ class SparseGPRegression(SparseGP_MPI):
""" """
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None): def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, X_variance=None, normalizer=None, mpi_comm=None):
num_data, input_dim = X.shape num_data, input_dim = X.shape
# kern defaults to rbf (plus white for stability) # kern defaults to rbf (plus white for stability)
@ -49,7 +50,13 @@ class SparseGPRegression(SparseGP_MPI):
if not (X_variance is None): if not (X_variance is None):
X = NormalPosterior(X,X_variance) X = NormalPosterior(X,X_variance)
SparseGP_MPI.__init__(self, X, Y, Z, kernel, likelihood, inference_method=VarDTC(), normalizer=normalizer) if mpi_comm is not None:
from ..inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
infr = VarDTC_minibatch(mpi_comm=mpi_comm)
else:
infr = VarDTC()
SparseGP_MPI.__init__(self, X, Y, Z, kernel, likelihood, inference_method=infr, normalizer=normalizer, mpi_comm=mpi_comm)
def parameters_changed(self): def parameters_changed(self):
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients_sparsegp,VarDTC_minibatch from ..inference.latent_function_inference.var_dtc_parallel import update_gradients_sparsegp,VarDTC_minibatch

View file

@ -1,45 +0,0 @@
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ..core import SVIGP
from .. import likelihoods
from .. import kern
class SVIGPRegression(SVIGP):
"""
Gaussian Process model for regression
This is a thin wrapper around the SVIGP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:rtype: model object
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, Y, kernel=None, Z=None, num_inducing=10, q_u=None, batchsize=10, normalize_Y=False):
# kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.rbf(X.shape[1], variance=1., lengthscale=4.) + kern.white(X.shape[1], 1e-3)
# Z defaults to a subset of the data
if Z is None:
i = np.random.permutation(X.shape[0])[:num_inducing]
Z = X[i].copy()
else:
assert Z.shape[1] == X.shape[1]
# likelihood defaults to Gaussian
likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y)
SVIGP.__init__(self, X, likelihood, kernel, Z, q_u=q_u, batchsize=batchsize)
self.load_batch()

View file

@ -6,7 +6,6 @@ import models_plots
import priors_plots import priors_plots
import variational_plots import variational_plots
import kernel_plots import kernel_plots
import svig_plots
import dim_reduction_plots import dim_reduction_plots
import mapping_plots import mapping_plots
import Tango import Tango

View file

@ -21,6 +21,7 @@ comm = MPI.COMM_WORLD
N = 100 N = 100
x = np.linspace(-6., 6., N) x = np.linspace(-6., 6., N)
y = np.sin(x) + np.random.randn(N) * 0.05 y = np.sin(x) + np.random.randn(N) * 0.05
comm.Bcast(y)
data = np.vstack([x,y]) data = np.vstack([x,y])
infr = GPy.inference.latent_function_inference.VarDTC_minibatch(mpi_comm=comm) infr = GPy.inference.latent_function_inference.VarDTC_minibatch(mpi_comm=comm)
m = GPy.models.BayesianGPLVM(data.T,1,mpi_comm=comm) m = GPy.models.BayesianGPLVM(data.T,1,mpi_comm=comm)
@ -39,10 +40,44 @@ if comm.rank==0:
(stdout, stderr) = p.communicate() (stdout, stderr) = p.communicate()
L1 = float(stdout.splitlines()[-2]) L1 = float(stdout.splitlines()[-2])
L2 = float(stdout.splitlines()[-1]) L2 = float(stdout.splitlines()[-1])
self.assertAlmostEqual(L1, L2) self.assertTrue(np.allclose(L1,L2))
import os import os
os.remove('mpi_test__.py') os.remove('mpi_test__.py')
def test_SparseGPRegression_MPI(self):
code = """
import numpy as np
import GPy
from mpi4py import MPI
np.random.seed(123456)
comm = MPI.COMM_WORLD
N = 100
x = np.linspace(-6., 6., N)
y = np.sin(x) + np.random.randn(N) * 0.05
comm.Bcast(y)
data = np.vstack([x,y])
#infr = GPy.inference.latent_function_inference.VarDTC_minibatch(mpi_comm=comm)
m = GPy.models.SparseGPRegression(data[:1].T,data[1:2].T,mpi_comm=comm)
m.optimize(max_iters=10)
if comm.rank==0:
print float(m.objective_function())
m.inference_method.mpi_comm=None
m.mpi_comm=None
m._trigger_params_changed()
print float(m.objective_function())
"""
with open('mpi_test__.py','w') as f:
f.write(code)
f.close()
p = subprocess.Popen('mpirun -n 4 python mpi_test__.py',stdout=subprocess.PIPE,shell=True)
(stdout, stderr) = p.communicate()
L1 = float(stdout.splitlines()[-2])
L2 = float(stdout.splitlines()[-1])
self.assertTrue(np.allclose(L1,L2))
import os
os.remove('mpi_test__.py')
except: except:
pass pass

View file

@ -5,23 +5,28 @@
Welcome to GPy's documentation! Welcome to GPy's documentation!
=============================== ===============================
For a quick start, you can have a look at one of the tutorials:
* `Basic Gaussian process regression <tuto_GP_regression.html>`_ `GPy <http://sheffieldml.github.io/GPy/>`_ is a Gaussian Process (GP) framework written in Python, from the Sheffield machine learning group.
* `Interacting with models <tuto_interacting_with_models.html>`_
* `A kernel overview <tuto_kernel_overview.html>`_
* `Writing new kernels <tuto_creating_new_kernels.html>`_
* `Writing new models <tuto_creating_new_models.html>`_
* `Parameterization handles <tuto_parameterized.html>`_
You may also be interested by some examples in the GPy/examples folder. The `GPy homepage <http://sheffieldml.github.io/GPy/>`_ contains tutorials for users and further information on the project, including installation instructions.
This documentation is mostly aimed at developers interacting closely with the code-base.
The code can be found on our `Github project page <https://github.com/SheffieldML/GPy>`_. It is open source and provided under the BSD license.
.. * `Basic Gaussian process regression <tuto_GP_regression.html>`_
.. * `Interacting with models <tuto_interacting_with_models.html>`_
.. * `A kernel overview <tuto_kernel_overview.html>`_
.. * `Writing new kernels <tuto_creating_new_kernels.html>`_
.. * `Writing new models <tuto_creating_new_models.html>`_
.. * `Parameterization handles <tuto_parameterized.html>`_
.. You may also be interested by some examples in the GPy/examples folder.
Contents: Contents:
.. toctree:: .. toctree::
:maxdepth: 2 :maxdepth: 2
installation
GPy GPy

View file

@ -1,7 +1,7 @@
/home/alans/Work/GPy/GPy/__init__.py:docstring of GPy.load:1: WARNING: Inline interpreted text or phrase reference start-string without end-string. /home/alans/Work/GPy/GPy/__init__.py:docstring of GPy.load:1: WARNING: Inline interpreted text or phrase reference start-string without end-string.
/home/alans/Work/GPy/GPy/core/gp.py:docstring of GPy.core.gp.GP:7: WARNING: Field list ends without a blank line; unexpected unindent. /home/alans/Work/GPy/GPy/core/gp.py:docstring of GPy.core.gp.GP.optimize:8: ERROR: Unknown interpreted text role "module".
/home/alans/Work/GPy/GPy/core/gp.py:docstring of GPy.core.gp.GP:10: ERROR: Unexpected indentation.
/home/alans/Work/GPy/GPy/core/gp.py:docstring of GPy.core.gp.GP.predictive_gradients:5: ERROR: Unexpected indentation. /home/alans/Work/GPy/GPy/core/gp.py:docstring of GPy.core.gp.GP.predictive_gradients:5: ERROR: Unexpected indentation.
/home/alans/Work/GPy/GPy/core/gp.py:docstring of GPy.core.gp.GP.predictive_gradients:8: WARNING: Block quote ends without a blank line; unexpected unindent.
/home/alans/Work/GPy/GPy/core/model.py:docstring of GPy.core.model.Model.optimize_restarts:29: WARNING: Explicit markup ends without a blank line; unexpected unindent. /home/alans/Work/GPy/GPy/core/model.py:docstring of GPy.core.model.Model.optimize_restarts:29: WARNING: Explicit markup ends without a blank line; unexpected unindent.
/home/alans/Work/GPy/doc/GPy.core.rst:65: WARNING: autodoc: failed to import module u'GPy.core.symbolic'; the following exception was raised: /home/alans/Work/GPy/doc/GPy.core.rst:65: WARNING: autodoc: failed to import module u'GPy.core.symbolic'; the following exception was raised:
Traceback (most recent call last): Traceback (most recent call last):
@ -23,31 +23,6 @@ ImportError: No module named lambdify
/home/alans/Work/GPy/GPy/core/parameterization/ties_and_remappings.py:docstring of GPy.core.parameterization.ties_and_remappings.Tie:18: SEVERE: Unexpected section title or transition. /home/alans/Work/GPy/GPy/core/parameterization/ties_and_remappings.py:docstring of GPy.core.parameterization.ties_and_remappings.Tie:18: SEVERE: Unexpected section title or transition.
================================ ================================
/home/alans/Work/GPy/doc/GPy.examples.rst:18: WARNING: autodoc: failed to import module u'GPy.examples.coreg_example'; the following exception was raised:
Traceback (most recent call last):
File "/home/alans/anaconda/envs/GPy/lib/python2.7/site-packages/sphinx/ext/autodoc.py", line 335, in import_object
__import__(self.modname)
File "/home/alans/Work/GPy/GPy/examples/coreg_example.py", line 44, in <module>
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1], Y_list=[Y1], Z_list = [Z1], kernel=kern)
File "/home/alans/Work/GPy/GPy/core/parameterization/parameterized.py", line 19, in __call__
self = super(ParametersChangedMeta, self).__call__(*args, **kw)
File "/home/alans/Work/GPy/GPy/models/sparse_gp_coregionalized_regression.py", line 66, in __init__
self['.*inducing'][:,-1].fix()
File "/home/alans/Work/GPy/GPy/core/parameterization/parameter_core.py", line 444, in constrain_fixed
self.notify_observers(self, None if trigger_parent else -np.inf)
File "/home/alans/Work/GPy/GPy/core/parameterization/parameter_core.py", line 148, in notify_observers
[callble(self, which=which) for _, _, callble in self.observers]
File "/home/alans/Work/GPy/GPy/core/parameterization/parameter_core.py", line 1077, in _pass_through_notify_observers
self.notify_observers(which=which)
File "/home/alans/Work/GPy/GPy/core/parameterization/parameter_core.py", line 148, in notify_observers
[callble(self, which=which) for _, _, callble in self.observers]
File "/home/alans/Work/GPy/GPy/core/parameterization/parameter_core.py", line 1075, in _parameters_changed_notification
self.parameters_changed()
File "/home/alans/Work/GPy/GPy/core/sparse_gp.py", line 66, in parameters_changed
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)
File "/home/alans/Work/GPy/GPy/inference/latent_function_inference/var_dtc.py", line 170, in inference
import ipdb; ipdb.set_trace()
ImportError: No module named ipdb
/home/alans/Work/GPy/GPy/kern/_src/coregionalize.py:docstring of GPy.kern._src.coregionalize.Coregionalize:5: ERROR: Unexpected indentation. /home/alans/Work/GPy/GPy/kern/_src/coregionalize.py:docstring of GPy.kern._src.coregionalize.Coregionalize:5: ERROR: Unexpected indentation.
/home/alans/Work/GPy/doc/GPy.kern._src.rst:73: WARNING: autodoc: failed to import module u'GPy.kern._src.hierarchical'; the following exception was raised: /home/alans/Work/GPy/doc/GPy.kern._src.rst:73: WARNING: autodoc: failed to import module u'GPy.kern._src.hierarchical'; the following exception was raised:
Traceback (most recent call last): Traceback (most recent call last):
@ -187,6 +162,7 @@ Parameter handles
:py:class:`~GPy.core.parameterization.param.Param` :py:class:`~GPy.core.parameterization.param.Param`
=========== ===========
/home/alans/Work/GPy/doc/installation.rst:: WARNING: document isn't included in any toctree
/home/alans/Work/GPy/doc/kernel_implementation.rst:: WARNING: document isn't included in any toctree /home/alans/Work/GPy/doc/kernel_implementation.rst:: WARNING: document isn't included in any toctree
/home/alans/Work/GPy/doc/modules.rst:: WARNING: document isn't included in any toctree /home/alans/Work/GPy/doc/modules.rst:: WARNING: document isn't included in any toctree
/home/alans/Work/GPy/doc/tuto_GP_regression.rst:: WARNING: document isn't included in any toctree /home/alans/Work/GPy/doc/tuto_GP_regression.rst:: WARNING: document isn't included in any toctree