mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-13 05:52:38 +02:00
Merge branch 'devel' of https://github.com/SheffieldML/GPy into devel
Conflicts: GPy/models/GPLVM.py
This commit is contained in:
commit
b7bf712f48
35 changed files with 723 additions and 657 deletions
|
|
@ -10,6 +10,11 @@ virtualenv:
|
||||||
before_install:
|
before_install:
|
||||||
- sudo apt-get install -qq python-scipy python-pip
|
- sudo apt-get install -qq python-scipy python-pip
|
||||||
- sudo apt-get install -qq python-matplotlib
|
- sudo apt-get install -qq python-matplotlib
|
||||||
|
# Workaround for a permissions issue with Travis virtual machine images
|
||||||
|
# that breaks Python's multiprocessing:
|
||||||
|
# https://github.com/travis-ci/travis-cookbooks/issues/155
|
||||||
|
- sudo rm -rf /dev/shm
|
||||||
|
- sudo ln -s /run/shm /dev/shm
|
||||||
|
|
||||||
install:
|
install:
|
||||||
- pip install --upgrade numpy==1.7.1
|
- pip install --upgrade numpy==1.7.1
|
||||||
|
|
|
||||||
|
|
@ -2,16 +2,17 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
import kern
|
import core
|
||||||
import models
|
import models
|
||||||
import inference
|
import inference
|
||||||
import util
|
import util
|
||||||
import examples
|
import examples
|
||||||
from core import priors
|
|
||||||
import likelihoods
|
import likelihoods
|
||||||
import testing
|
import testing
|
||||||
from numpy.testing import Tester
|
from numpy.testing import Tester
|
||||||
from nose.tools import nottest
|
from nose.tools import nottest
|
||||||
|
import kern
|
||||||
|
from core import priors
|
||||||
|
|
||||||
@nottest
|
@nottest
|
||||||
def tests():
|
def tests():
|
||||||
|
|
|
||||||
150
GPy/core/GP.py
Normal file
150
GPy/core/GP.py
Normal file
|
|
@ -0,0 +1,150 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import linalg
|
||||||
|
import pylab as pb
|
||||||
|
from .. import kern
|
||||||
|
from ..util.linalg import pdinv, mdot, tdot
|
||||||
|
#from ..util.plot import gpplot, Tango
|
||||||
|
from ..likelihoods import EP
|
||||||
|
from gp_base import GPBase
|
||||||
|
|
||||||
|
class GP(GPBase):
|
||||||
|
"""
|
||||||
|
Gaussian Process model for regression and EP
|
||||||
|
|
||||||
|
:param X: input observations
|
||||||
|
:param kernel: a GPy kernel, defaults to rbf+white
|
||||||
|
:parm likelihood: a GPy likelihood
|
||||||
|
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
|
:type normalize_X: False|True
|
||||||
|
:rtype: model object
|
||||||
|
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
|
||||||
|
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
|
||||||
|
:type powerep: list
|
||||||
|
|
||||||
|
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self, X, likelihood, kernel, normalize_X=False):
|
||||||
|
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||||
|
self._set_params(self._get_params())
|
||||||
|
|
||||||
|
def _set_params(self, p):
|
||||||
|
self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
|
||||||
|
self.likelihood._set_params(p[self.kern.Nparam_transformed():])
|
||||||
|
|
||||||
|
self.K = self.kern.K(self.X)
|
||||||
|
self.K += self.likelihood.covariance_matrix
|
||||||
|
|
||||||
|
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
|
||||||
|
|
||||||
|
# the gradient of the likelihood wrt the covariance matrix
|
||||||
|
if self.likelihood.YYT is None:
|
||||||
|
#alpha = np.dot(self.Ki, self.likelihood.Y)
|
||||||
|
alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1)
|
||||||
|
|
||||||
|
self.dL_dK = 0.5 * (tdot(alpha) - self.D * self.Ki)
|
||||||
|
else:
|
||||||
|
#tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
|
||||||
|
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
|
||||||
|
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
|
||||||
|
self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
|
||||||
|
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
|
||||||
|
|
||||||
|
def update_likelihood_approximation(self):
|
||||||
|
"""
|
||||||
|
Approximates a non-gaussian likelihood using Expectation Propagation
|
||||||
|
|
||||||
|
For a Gaussian likelihood, no iteration is required:
|
||||||
|
this function does nothing
|
||||||
|
"""
|
||||||
|
self.likelihood.fit_full(self.kern.K(self.X))
|
||||||
|
self._set_params(self._get_params()) # update the GP
|
||||||
|
|
||||||
|
def _model_fit_term(self):
|
||||||
|
"""
|
||||||
|
Computes the model fit using YYT if it's available
|
||||||
|
"""
|
||||||
|
if self.likelihood.YYT is None:
|
||||||
|
tmp, _ = linalg.lapack.flapack.dtrtrs(self.L, np.asfortranarray(self.likelihood.Y), lower=1)
|
||||||
|
return -0.5 * np.sum(np.square(tmp))
|
||||||
|
#return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
|
||||||
|
else:
|
||||||
|
return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
|
||||||
|
|
||||||
|
def log_likelihood(self):
|
||||||
|
"""
|
||||||
|
The log marginal likelihood of the GP.
|
||||||
|
|
||||||
|
For an EP model, can be written as the log likelihood of a regression
|
||||||
|
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
|
||||||
|
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
|
||||||
|
"""
|
||||||
|
return -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z
|
||||||
|
|
||||||
|
|
||||||
|
def _log_likelihood_gradients(self):
|
||||||
|
"""
|
||||||
|
The gradient of all parameters.
|
||||||
|
|
||||||
|
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
|
||||||
|
"""
|
||||||
|
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
||||||
|
|
||||||
|
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False,stop=False):
|
||||||
|
"""
|
||||||
|
Internal helper function for making predictions, does not account
|
||||||
|
for normalization or likelihood
|
||||||
|
"""
|
||||||
|
Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
|
||||||
|
#KiKx = np.dot(self.Ki, Kx)
|
||||||
|
KiKx, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(Kx), lower=1)
|
||||||
|
mu = np.dot(KiKx.T, self.likelihood.Y)
|
||||||
|
if full_cov:
|
||||||
|
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
|
||||||
|
var = Kxx - np.dot(KiKx.T, Kx)
|
||||||
|
else:
|
||||||
|
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
|
||||||
|
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
|
||||||
|
var = var[:, None]
|
||||||
|
if stop:
|
||||||
|
debug_this
|
||||||
|
return mu, var
|
||||||
|
|
||||||
|
def predict(self, Xnew, which_parts='all', full_cov=False):
|
||||||
|
"""
|
||||||
|
Predict the function(s) at the new point(s) Xnew.
|
||||||
|
Arguments
|
||||||
|
---------
|
||||||
|
:param Xnew: The points at which to make a prediction
|
||||||
|
:type Xnew: np.ndarray, Nnew x self.Q
|
||||||
|
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
||||||
|
:type which_parts: ('all', list of bools)
|
||||||
|
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
|
||||||
|
:type full_cov: bool
|
||||||
|
:rtype: posterior mean, a Numpy array, Nnew x self.D
|
||||||
|
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
||||||
|
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D
|
||||||
|
|
||||||
|
|
||||||
|
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
|
||||||
|
This is to allow for different normalizations of the output dimensions.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# normalize X values
|
||||||
|
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
|
||||||
|
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
|
||||||
|
|
||||||
|
# now push through likelihood
|
||||||
|
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
|
||||||
|
|
||||||
|
return mean, var, _025pm, _975pm
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
from GP import GP
|
||||||
|
from sparse_GP import sparse_GP
|
||||||
from model import *
|
from model import *
|
||||||
from parameterised import *
|
from parameterised import *
|
||||||
import priors
|
import priors
|
||||||
|
|
|
||||||
127
GPy/core/gp_base.py
Normal file
127
GPy/core/gp_base.py
Normal file
|
|
@ -0,0 +1,127 @@
|
||||||
|
import numpy as np
|
||||||
|
import model
|
||||||
|
from .. import kern
|
||||||
|
from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D
|
||||||
|
import pylab as pb
|
||||||
|
|
||||||
|
class GPBase(model.model):
|
||||||
|
"""
|
||||||
|
Gaussian Process model for holding shared behaviour between
|
||||||
|
sprase_GP and GP models
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, X, likelihood, kernel, normalize_X=False):
|
||||||
|
self.X = X
|
||||||
|
assert len(self.X.shape) == 2
|
||||||
|
self.N, self.Q = self.X.shape
|
||||||
|
assert isinstance(kernel, kern.kern)
|
||||||
|
self.kern = kernel
|
||||||
|
self.likelihood = likelihood
|
||||||
|
assert self.X.shape[0] == self.likelihood.data.shape[0]
|
||||||
|
self.N, self.D = self.likelihood.data.shape
|
||||||
|
|
||||||
|
if normalize_X:
|
||||||
|
self._Xmean = X.mean(0)[None, :]
|
||||||
|
self._Xstd = X.std(0)[None, :]
|
||||||
|
self.X = (X.copy() - self._Xmean) / self._Xstd
|
||||||
|
else:
|
||||||
|
self._Xmean = np.zeros((1,self.Q))
|
||||||
|
self._Xstd = np.ones((1,self.Q))
|
||||||
|
|
||||||
|
model.model.__init__(self)
|
||||||
|
|
||||||
|
# All leaf nodes should call self._set_params(self._get_params()) at
|
||||||
|
# the end
|
||||||
|
|
||||||
|
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False):
|
||||||
|
"""
|
||||||
|
Plot the GP's view of the world, where the data is normalized and the
|
||||||
|
likelihood is Gaussian.
|
||||||
|
|
||||||
|
:param samples: the number of a posteriori samples to plot
|
||||||
|
:param which_data: which if the training data to plot (default all)
|
||||||
|
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
||||||
|
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
||||||
|
:param which_parts: which of the kernel functions to plot (additively)
|
||||||
|
:type which_parts: 'all', or list of bools
|
||||||
|
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||||
|
|
||||||
|
Plot the posterior of the GP.
|
||||||
|
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||||
|
- In two dimsensions, a contour-plot shows the mean predicted function
|
||||||
|
- In higher dimensions, we've no implemented this yet !TODO!
|
||||||
|
|
||||||
|
Can plot only part of the data and part of the posterior functions
|
||||||
|
using which_data and which_functions
|
||||||
|
"""
|
||||||
|
if which_data == 'all':
|
||||||
|
which_data = slice(None)
|
||||||
|
|
||||||
|
if self.X.shape[1] == 1:
|
||||||
|
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
|
||||||
|
if samples == 0:
|
||||||
|
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
||||||
|
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v))
|
||||||
|
pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
|
||||||
|
else:
|
||||||
|
m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
|
||||||
|
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
|
||||||
|
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None])
|
||||||
|
for i in range(samples):
|
||||||
|
pb.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
||||||
|
pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
|
||||||
|
pb.xlim(xmin, xmax)
|
||||||
|
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
|
||||||
|
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
||||||
|
pb.ylim(ymin, ymax)
|
||||||
|
|
||||||
|
elif self.X.shape[1] == 2:
|
||||||
|
resolution = resolution or 50
|
||||||
|
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
|
||||||
|
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
||||||
|
m = m.reshape(resolution, resolution).T
|
||||||
|
pb.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||||
|
pb.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max())
|
||||||
|
pb.xlim(xmin[0], xmax[0])
|
||||||
|
pb.ylim(xmin[1], xmax[1])
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||||
|
|
||||||
|
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20):
|
||||||
|
"""
|
||||||
|
TODO: Docstrings!
|
||||||
|
:param levels: for 2D plotting, the number of contour levels to use
|
||||||
|
|
||||||
|
"""
|
||||||
|
# TODO include samples
|
||||||
|
if which_data == 'all':
|
||||||
|
which_data = slice(None)
|
||||||
|
|
||||||
|
if self.X.shape[1] == 1:
|
||||||
|
|
||||||
|
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
|
||||||
|
|
||||||
|
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
|
||||||
|
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
||||||
|
for d in range(m.shape[1]):
|
||||||
|
gpplot(Xnew, m[:,d], lower[:,d], upper[:,d])
|
||||||
|
pb.plot(Xu[which_data], self.likelihood.data[which_data,d], 'kx', mew=1.5)
|
||||||
|
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
|
||||||
|
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
||||||
|
pb.xlim(xmin, xmax)
|
||||||
|
pb.ylim(ymin, ymax)
|
||||||
|
|
||||||
|
elif self.X.shape[1] == 2: # FIXME
|
||||||
|
resolution = resolution or 50
|
||||||
|
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
|
||||||
|
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
|
||||||
|
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
||||||
|
m = m.reshape(resolution, resolution).T
|
||||||
|
pb.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||||
|
Yf = self.likelihood.Y.flatten()
|
||||||
|
pb.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
|
||||||
|
pb.xlim(xmin[0], xmax[0])
|
||||||
|
pb.ylim(xmin[1], xmax[1])
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||||
|
|
@ -14,16 +14,17 @@ import priors
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import pdb
|
import pdb
|
||||||
|
from GPy.core.domains import POSITIVE, REAL
|
||||||
# import numdifftools as ndt
|
# import numdifftools as ndt
|
||||||
|
|
||||||
class model(parameterised):
|
class model(parameterised):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
parameterised.__init__(self)
|
parameterised.__init__(self)
|
||||||
self.priors = [None for i in range(self._get_params().size)]
|
self.priors = None
|
||||||
self.optimization_runs = []
|
self.optimization_runs = []
|
||||||
self.sampling_runs = []
|
self.sampling_runs = []
|
||||||
self._set_params(self._get_params())
|
|
||||||
self.preferred_optimizer = 'tnc'
|
self.preferred_optimizer = 'tnc'
|
||||||
|
#self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||||
def _set_params(self, x):
|
def _set_params(self, x):
|
||||||
|
|
@ -33,13 +34,13 @@ class model(parameterised):
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||||
|
|
||||||
def set_prior(self, which, what):
|
def set_prior(self, regexp, what):
|
||||||
"""
|
"""
|
||||||
Sets priors on the model parameters.
|
Sets priors on the model parameters.
|
||||||
|
|
||||||
Arguments
|
Arguments
|
||||||
---------
|
---------
|
||||||
which -- string, regexp, or integer array
|
regexp -- string, regexp, or integer array
|
||||||
what -- instance of a prior class
|
what -- instance of a prior class
|
||||||
|
|
||||||
Notes
|
Notes
|
||||||
|
|
@ -51,8 +52,10 @@ class model(parameterised):
|
||||||
For tied parameters, the prior will only be "counted" once, thus
|
For tied parameters, the prior will only be "counted" once, thus
|
||||||
a prior object is only inserted on the first tied index
|
a prior object is only inserted on the first tied index
|
||||||
"""
|
"""
|
||||||
|
if self.priors is None:
|
||||||
|
self.priors = [None for i in range(self._get_params().size)]
|
||||||
|
|
||||||
which = self.grep_param_names(which)
|
which = self.grep_param_names(regexp)
|
||||||
|
|
||||||
# check tied situation
|
# check tied situation
|
||||||
tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
|
tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
|
||||||
|
|
@ -66,8 +69,9 @@ class model(parameterised):
|
||||||
|
|
||||||
|
|
||||||
# check constraints are okay
|
# check constraints are okay
|
||||||
if isinstance(what, (priors.gamma, priors.inverse_gamma, priors.log_Gaussian)):
|
|
||||||
constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain == 'positive']
|
if what.domain is POSITIVE:
|
||||||
|
constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain == POSITIVE]
|
||||||
if len(constrained_positive_indices):
|
if len(constrained_positive_indices):
|
||||||
constrained_positive_indices = np.hstack(constrained_positive_indices)
|
constrained_positive_indices = np.hstack(constrained_positive_indices)
|
||||||
else:
|
else:
|
||||||
|
|
@ -80,7 +84,7 @@ class model(parameterised):
|
||||||
print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
|
print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
|
||||||
print '\n'
|
print '\n'
|
||||||
self.constrain_positive(unconst)
|
self.constrain_positive(unconst)
|
||||||
elif isinstance(what, priors.Gaussian):
|
elif what.domain is REAL:
|
||||||
assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
|
assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
|
||||||
else:
|
else:
|
||||||
raise ValueError, "prior not recognised"
|
raise ValueError, "prior not recognised"
|
||||||
|
|
@ -104,10 +108,15 @@ class model(parameterised):
|
||||||
|
|
||||||
def log_prior(self):
|
def log_prior(self):
|
||||||
"""evaluate the prior"""
|
"""evaluate the prior"""
|
||||||
return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])
|
if self.priors is not None:
|
||||||
|
return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])
|
||||||
|
else:
|
||||||
|
return 0.
|
||||||
|
|
||||||
def _log_prior_gradients(self):
|
def _log_prior_gradients(self):
|
||||||
"""evaluate the gradients of the priors"""
|
"""evaluate the gradients of the priors"""
|
||||||
|
if self.priors is None:
|
||||||
|
return 0.
|
||||||
x = self._get_params()
|
x = self._get_params()
|
||||||
ret = np.zeros(x.size)
|
ret = np.zeros(x.size)
|
||||||
[np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
|
[np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
|
||||||
|
|
@ -135,7 +144,8 @@ class model(parameterised):
|
||||||
self._set_params_transformed(x)
|
self._set_params_transformed(x)
|
||||||
# now draw from prior where possible
|
# now draw from prior where possible
|
||||||
x = self._get_params()
|
x = self._get_params()
|
||||||
[np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
|
if self.priors is not None:
|
||||||
|
[np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
|
||||||
self._set_params(x)
|
self._set_params(x)
|
||||||
self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
||||||
|
|
||||||
|
|
@ -212,7 +222,7 @@ class model(parameterised):
|
||||||
currently_constrained = self.all_constrained_indices()
|
currently_constrained = self.all_constrained_indices()
|
||||||
to_make_positive = []
|
to_make_positive = []
|
||||||
for s in positive_strings:
|
for s in positive_strings:
|
||||||
for i in self.grep_param_names(s):
|
for i in self.grep_param_names(".*"+s):
|
||||||
if not (i in currently_constrained):
|
if not (i in currently_constrained):
|
||||||
#to_make_positive.append(re.escape(param_names[i]))
|
#to_make_positive.append(re.escape(param_names[i]))
|
||||||
to_make_positive.append(i)
|
to_make_positive.append(i)
|
||||||
|
|
@ -234,16 +244,13 @@ class model(parameterised):
|
||||||
Gets the gradients from the likelihood and the priors.
|
Gets the gradients from the likelihood and the priors.
|
||||||
"""
|
"""
|
||||||
self._set_params_transformed(x)
|
self._set_params_transformed(x)
|
||||||
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
|
obj_grads = - self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||||
prior_gradients = self._transform_gradients(self._log_prior_gradients())
|
return obj_grads
|
||||||
return -LL_gradients - prior_gradients
|
|
||||||
|
|
||||||
def objective_and_gradients(self, x):
|
def objective_and_gradients(self, x):
|
||||||
self._set_params_transformed(x)
|
self._set_params_transformed(x)
|
||||||
obj_f = -self.log_likelihood() - self.log_prior()
|
obj_f = -self.log_likelihood() - self.log_prior()
|
||||||
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
|
obj_grads = - self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||||
prior_gradients = self._transform_gradients(self._log_prior_gradients())
|
|
||||||
obj_grads = -LL_gradients - prior_gradients
|
|
||||||
return obj_f, obj_grads
|
return obj_f, obj_grads
|
||||||
|
|
||||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||||
|
|
|
||||||
|
|
@ -64,21 +64,21 @@ class parameterised(object):
|
||||||
m['var'] = 2. # > sets all parameters matching 'var' to 2.
|
m['var'] = 2. # > sets all parameters matching 'var' to 2.
|
||||||
m['var'] = <array-like> # > sets parameters matching 'var' to <array-like>
|
m['var'] = <array-like> # > sets parameters matching 'var' to <array-like>
|
||||||
"""
|
"""
|
||||||
def get(self, name):
|
def get(self, regexp):
|
||||||
warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
|
warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
|
||||||
return self[name]
|
return self[regexp]
|
||||||
|
|
||||||
def set(self, name, val):
|
def set(self, regexp, val):
|
||||||
warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
|
warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
|
||||||
self[name] = val
|
self[regexp] = val
|
||||||
|
|
||||||
def __getitem__(self, name, return_names=False):
|
def __getitem__(self, regexp, return_names=False):
|
||||||
"""
|
"""
|
||||||
Get a model parameter by name. The name is applied as a regular
|
Get a model parameter by name. The name is applied as a regular
|
||||||
expression and all parameters that match that regular expression are
|
expression and all parameters that match that regular expression are
|
||||||
returned.
|
returned.
|
||||||
"""
|
"""
|
||||||
matches = self.grep_param_names(name)
|
matches = self.grep_param_names(regexp)
|
||||||
if len(matches):
|
if len(matches):
|
||||||
if return_names:
|
if return_names:
|
||||||
return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
|
return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
|
||||||
|
|
@ -103,8 +103,8 @@ class parameterised(object):
|
||||||
else:
|
else:
|
||||||
raise AttributeError, "no parameter matches %s" % name
|
raise AttributeError, "no parameter matches %s" % name
|
||||||
|
|
||||||
def tie_params(self, which):
|
def tie_params(self, regexp):
|
||||||
matches = self.grep_param_names(which)
|
matches = self.grep_param_names(regexp)
|
||||||
assert matches.size > 0, "need at least something to tie together"
|
assert matches.size > 0, "need at least something to tie together"
|
||||||
if len(self.tied_indices):
|
if len(self.tied_indices):
|
||||||
assert not np.any(matches[:, None] == np.hstack(self.tied_indices)), "Some indices are already tied!"
|
assert not np.any(matches[:, None] == np.hstack(self.tied_indices)), "Some indices are already tied!"
|
||||||
|
|
@ -119,28 +119,23 @@ class parameterised(object):
|
||||||
"""Unties all parameters by setting tied_indices to an empty list."""
|
"""Unties all parameters by setting tied_indices to an empty list."""
|
||||||
self.tied_indices = []
|
self.tied_indices = []
|
||||||
|
|
||||||
def grep_param_names(self, expr):
|
def grep_param_names(self, regexp):
|
||||||
"""
|
"""
|
||||||
Arguments
|
:param regexp: regular expression to select parameter names
|
||||||
---------
|
:type regexp: re | str | int
|
||||||
expr -- can be a regular expression object or a string to be turned into regular expression object.
|
:rtype: the indices of self._get_param_names which match the regular expression.
|
||||||
|
|
||||||
Returns
|
Note:-
|
||||||
-------
|
Other objects are passed through - i.e. integers which weren't meant for grepping
|
||||||
the indices of self._get_param_names which match the regular expression.
|
|
||||||
|
|
||||||
Notes
|
|
||||||
-----
|
|
||||||
Other objects are passed through - i.e. integers which weren't meant for grepping
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if type(expr) in [str, np.string_, np.str]:
|
if type(regexp) in [str, np.string_, np.str]:
|
||||||
expr = re.compile(expr)
|
regexp = re.compile(regexp)
|
||||||
return np.nonzero([expr.search(name) for name in self._get_param_names()])[0]
|
return np.nonzero([regexp.match(name) for name in self._get_param_names()])[0]
|
||||||
elif type(expr) is re._pattern_type:
|
elif type(regexp) is re._pattern_type:
|
||||||
return np.nonzero([expr.search(name) for name in self._get_param_names()])[0]
|
return np.nonzero([regexp.match(name) for name in self._get_param_names()])[0]
|
||||||
else:
|
else:
|
||||||
return expr
|
return regexp
|
||||||
|
|
||||||
def Nparam_transformed(self):
|
def Nparam_transformed(self):
|
||||||
removed = 0
|
removed = 0
|
||||||
|
|
@ -152,9 +147,9 @@ class parameterised(object):
|
||||||
|
|
||||||
return len(self._get_params()) - removed
|
return len(self._get_params()) - removed
|
||||||
|
|
||||||
def unconstrain(self, which):
|
def unconstrain(self, regexp):
|
||||||
"""Unconstrain matching parameters. does not untie parameters"""
|
"""Unconstrain matching parameters. does not untie parameters"""
|
||||||
matches = self.grep_param_names(which)
|
matches = self.grep_param_names(regexp)
|
||||||
|
|
||||||
#tranformed contraints:
|
#tranformed contraints:
|
||||||
for match in matches:
|
for match in matches:
|
||||||
|
|
@ -178,17 +173,17 @@ class parameterised(object):
|
||||||
else:
|
else:
|
||||||
self.fixed_indices, self.fixed_values = [], []
|
self.fixed_indices, self.fixed_values = [], []
|
||||||
|
|
||||||
def constrain_negative(self, which):
|
def constrain_negative(self, regexp):
|
||||||
""" Set negative constraints. """
|
""" Set negative constraints. """
|
||||||
self.constrain(which, transformations.negative_exponent())
|
self.constrain(regexp, transformations.negative_exponent())
|
||||||
|
|
||||||
def constrain_positive(self, which):
|
def constrain_positive(self, regexp):
|
||||||
""" Set positive constraints. """
|
""" Set positive constraints. """
|
||||||
self.constrain(which, transformations.logexp())
|
self.constrain(regexp, transformations.logexp())
|
||||||
|
|
||||||
def constrain_bounded(self, which,lower, upper):
|
def constrain_bounded(self, regexp,lower, upper):
|
||||||
""" Set bounded constraints. """
|
""" Set bounded constraints. """
|
||||||
self.constrain(which, transformations.logistic(lower, upper))
|
self.constrain(regexp, transformations.logistic(lower, upper))
|
||||||
|
|
||||||
def all_constrained_indices(self):
|
def all_constrained_indices(self):
|
||||||
if len(self.constrained_indices) or len(self.fixed_indices):
|
if len(self.constrained_indices) or len(self.fixed_indices):
|
||||||
|
|
@ -196,10 +191,10 @@ class parameterised(object):
|
||||||
else:
|
else:
|
||||||
return np.empty(shape=(0,))
|
return np.empty(shape=(0,))
|
||||||
|
|
||||||
def constrain(self,which,transform):
|
def constrain(self,regexp,transform):
|
||||||
assert isinstance(transform,transformations.transformation)
|
assert isinstance(transform,transformations.transformation)
|
||||||
|
|
||||||
matches = self.grep_param_names(which)
|
matches = self.grep_param_names(regexp)
|
||||||
overlap = set(matches).intersection(set(self.all_constrained_indices()))
|
overlap = set(matches).intersection(set(self.all_constrained_indices()))
|
||||||
if overlap:
|
if overlap:
|
||||||
self.unconstrain(np.asarray(list(overlap)))
|
self.unconstrain(np.asarray(list(overlap)))
|
||||||
|
|
@ -214,11 +209,11 @@ class parameterised(object):
|
||||||
x[matches] = transform.initialize(x[matches])
|
x[matches] = transform.initialize(x[matches])
|
||||||
self._set_params(x)
|
self._set_params(x)
|
||||||
|
|
||||||
def constrain_fixed(self, which, value=None):
|
def constrain_fixed(self, regexp, value=None):
|
||||||
"""
|
"""
|
||||||
Arguments
|
Arguments
|
||||||
---------
|
---------
|
||||||
:param which: np.array(dtype=int), or regular expression object or string
|
:param regexp: np.array(dtype=int), or regular expression object or string
|
||||||
:param value: a float to fix the matched values to. If the value is not specified,
|
:param value: a float to fix the matched values to. If the value is not specified,
|
||||||
the parameter is fixed to the current value
|
the parameter is fixed to the current value
|
||||||
|
|
||||||
|
|
@ -227,7 +222,7 @@ class parameterised(object):
|
||||||
Fixing a parameter which is tied to another, or constrained in some way will result in an error.
|
Fixing a parameter which is tied to another, or constrained in some way will result in an error.
|
||||||
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
|
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
|
||||||
"""
|
"""
|
||||||
matches = self.grep_param_names(which)
|
matches = self.grep_param_names(regexp)
|
||||||
assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
|
assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
|
||||||
self.fixed_indices.append(matches)
|
self.fixed_indices.append(matches)
|
||||||
if value != None:
|
if value != None:
|
||||||
|
|
|
||||||
|
|
@ -6,17 +6,20 @@ import numpy as np
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
from scipy.special import gammaln, digamma
|
from scipy.special import gammaln, digamma
|
||||||
from ..util.linalg import pdinv
|
from ..util.linalg import pdinv
|
||||||
|
from GPy.core.domains import REAL, POSITIVE
|
||||||
|
import warnings
|
||||||
|
|
||||||
class prior:
|
class prior:
|
||||||
def pdf(self,x):
|
domain = None
|
||||||
|
def pdf(self, x):
|
||||||
return np.exp(self.lnpdf(x))
|
return np.exp(self.lnpdf(x))
|
||||||
|
|
||||||
def plot(self):
|
def plot(self):
|
||||||
rvs = self.rvs(1000)
|
rvs = self.rvs(1000)
|
||||||
pb.hist(rvs,100,normed=True)
|
pb.hist(rvs, 100, normed=True)
|
||||||
xmin,xmax = pb.xlim()
|
xmin, xmax = pb.xlim()
|
||||||
xx = np.linspace(xmin,xmax,1000)
|
xx = np.linspace(xmin, xmax, 1000)
|
||||||
pb.plot(xx,self.pdf(xx),'r',linewidth=2)
|
pb.plot(xx, self.pdf(xx), 'r', linewidth=2)
|
||||||
|
|
||||||
|
|
||||||
class Gaussian(prior):
|
class Gaussian(prior):
|
||||||
|
|
@ -29,24 +32,24 @@ class Gaussian(prior):
|
||||||
.. Note:: Bishop 2006 notation is used throughout the code
|
.. Note:: Bishop 2006 notation is used throughout the code
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
domain = REAL
|
||||||
def __init__(self,mu,sigma):
|
def __init__(self, mu, sigma):
|
||||||
self.mu = float(mu)
|
self.mu = float(mu)
|
||||||
self.sigma = float(sigma)
|
self.sigma = float(sigma)
|
||||||
self.sigma2 = np.square(self.sigma)
|
self.sigma2 = np.square(self.sigma)
|
||||||
self.constant = -0.5*np.log(2*np.pi*self.sigma2)
|
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "N("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')'
|
return "N(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'
|
||||||
|
|
||||||
def lnpdf(self,x):
|
def lnpdf(self, x):
|
||||||
return self.constant - 0.5*np.square(x-self.mu)/self.sigma2
|
return self.constant - 0.5 * np.square(x - self.mu) / self.sigma2
|
||||||
|
|
||||||
def lnpdf_grad(self,x):
|
def lnpdf_grad(self, x):
|
||||||
return -(x-self.mu)/self.sigma2
|
return -(x - self.mu) / self.sigma2
|
||||||
|
|
||||||
def rvs(self,n):
|
def rvs(self, n):
|
||||||
return np.random.randn(n)*self.sigma + self.mu
|
return np.random.randn(n) * self.sigma + self.mu
|
||||||
|
|
||||||
|
|
||||||
class log_Gaussian(prior):
|
class log_Gaussian(prior):
|
||||||
|
|
@ -59,24 +62,24 @@ class log_Gaussian(prior):
|
||||||
.. Note:: Bishop 2006 notation is used throughout the code
|
.. Note:: Bishop 2006 notation is used throughout the code
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
domain = POSITIVE
|
||||||
def __init__(self,mu,sigma):
|
def __init__(self, mu, sigma):
|
||||||
self.mu = float(mu)
|
self.mu = float(mu)
|
||||||
self.sigma = float(sigma)
|
self.sigma = float(sigma)
|
||||||
self.sigma2 = np.square(self.sigma)
|
self.sigma2 = np.square(self.sigma)
|
||||||
self.constant = -0.5*np.log(2*np.pi*self.sigma2)
|
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "lnN("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')'
|
return "lnN(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'
|
||||||
|
|
||||||
def lnpdf(self,x):
|
def lnpdf(self, x):
|
||||||
return self.constant - 0.5*np.square(np.log(x)-self.mu)/self.sigma2 -np.log(x)
|
return self.constant - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 - np.log(x)
|
||||||
|
|
||||||
def lnpdf_grad(self,x):
|
def lnpdf_grad(self, x):
|
||||||
return -((np.log(x)-self.mu)/self.sigma2+1.)/x
|
return -((np.log(x) - self.mu) / self.sigma2 + 1.) / x
|
||||||
|
|
||||||
def rvs(self,n):
|
def rvs(self, n):
|
||||||
return np.exp(np.random.randn(n)*self.sigma + self.mu)
|
return np.exp(np.random.randn(n) * self.sigma + self.mu)
|
||||||
|
|
||||||
|
|
||||||
class multivariate_Gaussian:
|
class multivariate_Gaussian:
|
||||||
|
|
@ -89,47 +92,47 @@ class multivariate_Gaussian:
|
||||||
.. Note:: Bishop 2006 notation is used throughout the code
|
.. Note:: Bishop 2006 notation is used throughout the code
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
domain = REAL
|
||||||
def __init__(self,mu,var):
|
def __init__(self, mu, var):
|
||||||
self.mu = np.array(mu).flatten()
|
self.mu = np.array(mu).flatten()
|
||||||
self.var = np.array(var)
|
self.var = np.array(var)
|
||||||
assert len(self.var.shape)==2
|
assert len(self.var.shape) == 2
|
||||||
assert self.var.shape[0]==self.var.shape[1]
|
assert self.var.shape[0] == self.var.shape[1]
|
||||||
assert self.var.shape[0]==self.mu.size
|
assert self.var.shape[0] == self.mu.size
|
||||||
self.D = self.mu.size
|
self.D = self.mu.size
|
||||||
self.inv, self.hld = pdinv(self.var)
|
self.inv, self.hld = pdinv(self.var)
|
||||||
self.constant = -0.5*self.D*np.log(2*np.pi) - self.hld
|
self.constant = -0.5 * self.D * np.log(2 * np.pi) - self.hld
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def pdf(self,x):
|
def pdf(self, x):
|
||||||
return np.exp(self.lnpdf(x))
|
return np.exp(self.lnpdf(x))
|
||||||
|
|
||||||
def lnpdf(self,x):
|
def lnpdf(self, x):
|
||||||
d = x-self.mu
|
d = x - self.mu
|
||||||
return self.constant - 0.5*np.sum(d*np.dot(d,self.inv),1)
|
return self.constant - 0.5 * np.sum(d * np.dot(d, self.inv), 1)
|
||||||
|
|
||||||
def lnpdf_grad(self,x):
|
def lnpdf_grad(self, x):
|
||||||
d = x-self.mu
|
d = x - self.mu
|
||||||
return -np.dot(self.inv,d)
|
return -np.dot(self.inv, d)
|
||||||
|
|
||||||
def rvs(self,n):
|
def rvs(self, n):
|
||||||
return np.random.multivariate_normal(self.mu, self.var,n)
|
return np.random.multivariate_normal(self.mu, self.var, n)
|
||||||
|
|
||||||
def plot(self):
|
def plot(self):
|
||||||
if self.D==2:
|
if self.D == 2:
|
||||||
rvs = self.rvs(200)
|
rvs = self.rvs(200)
|
||||||
pb.plot(rvs[:,0],rvs[:,1], 'kx', mew=1.5)
|
pb.plot(rvs[:, 0], rvs[:, 1], 'kx', mew=1.5)
|
||||||
xmin,xmax = pb.xlim()
|
xmin, xmax = pb.xlim()
|
||||||
ymin,ymax = pb.ylim()
|
ymin, ymax = pb.ylim()
|
||||||
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
|
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
|
||||||
xflat = np.vstack((xx.flatten(),yy.flatten())).T
|
xflat = np.vstack((xx.flatten(), yy.flatten())).T
|
||||||
zz = self.pdf(xflat).reshape(100,100)
|
zz = self.pdf(xflat).reshape(100, 100)
|
||||||
pb.contour(xx,yy,zz,linewidths=2)
|
pb.contour(xx, yy, zz, linewidths=2)
|
||||||
|
|
||||||
|
|
||||||
def gamma_from_EV(E,V):
|
def gamma_from_EV(E, V):
|
||||||
"""
|
"""
|
||||||
Creates an instance of a gamma prior by specifying the Expected value(s)
|
Creates an instance of a gamma prior by specifying the Expected value(s)
|
||||||
and Variance(s) of the distribution.
|
and Variance(s) of the distribution.
|
||||||
|
|
@ -138,10 +141,10 @@ def gamma_from_EV(E,V):
|
||||||
:param V: variance
|
:param V: variance
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
warnings.warn("use Gamma.from_EV to create Gamma Prior", FutureWarning)
|
||||||
a = np.square(E)/V
|
a = np.square(E) / V
|
||||||
b = E/V
|
b = E / V
|
||||||
return gamma(a,b)
|
return gamma(a, b)
|
||||||
|
|
||||||
class gamma(prior):
|
class gamma(prior):
|
||||||
"""
|
"""
|
||||||
|
|
@ -153,33 +156,34 @@ class gamma(prior):
|
||||||
.. Note:: Bishop 2006 notation is used throughout the code
|
.. Note:: Bishop 2006 notation is used throughout the code
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self,a,b):
|
domain = POSITIVE
|
||||||
|
def __init__(self, a, b):
|
||||||
self.a = float(a)
|
self.a = float(a)
|
||||||
self.b = float(b)
|
self.b = float(b)
|
||||||
self.constant = -gammaln(self.a) + a*np.log(b)
|
self.constant = -gammaln(self.a) + a * np.log(b)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Ga("+str(np.round(self.a))+', '+str(np.round(self.b))+')'
|
return "Ga(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'
|
||||||
|
|
||||||
def summary(self):
|
def summary(self):
|
||||||
ret = {"E[x]": self.a/self.b,\
|
ret = {"E[x]": self.a / self.b, \
|
||||||
"E[ln x]": digamma(self.a) - np.log(self.b),\
|
"E[ln x]": digamma(self.a) - np.log(self.b), \
|
||||||
"var[x]": self.a/self.b/self.b,\
|
"var[x]": self.a / self.b / self.b, \
|
||||||
"Entropy": gammaln(self.a) - (self.a-1.)*digamma(self.a) - np.log(self.b) + self.a}
|
"Entropy": gammaln(self.a) - (self.a - 1.) * digamma(self.a) - np.log(self.b) + self.a}
|
||||||
if self.a >1:
|
if self.a > 1:
|
||||||
ret['Mode'] = (self.a-1.)/self.b
|
ret['Mode'] = (self.a - 1.) / self.b
|
||||||
else:
|
else:
|
||||||
ret['mode'] = np.nan
|
ret['mode'] = np.nan
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def lnpdf(self,x):
|
def lnpdf(self, x):
|
||||||
return self.constant + (self.a-1)*np.log(x) - self.b*x
|
return self.constant + (self.a - 1) * np.log(x) - self.b * x
|
||||||
|
|
||||||
def lnpdf_grad(self,x):
|
def lnpdf_grad(self, x):
|
||||||
return (self.a-1.)/x - self.b
|
return (self.a - 1.) / x - self.b
|
||||||
|
|
||||||
def rvs(self,n):
|
def rvs(self, n):
|
||||||
return np.random.gamma(scale=1./self.b,shape=self.a,size=n)
|
return np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
|
||||||
|
|
||||||
class inverse_gamma(prior):
|
class inverse_gamma(prior):
|
||||||
"""
|
"""
|
||||||
|
|
@ -191,19 +195,20 @@ class inverse_gamma(prior):
|
||||||
.. Note:: Bishop 2006 notation is used throughout the code
|
.. Note:: Bishop 2006 notation is used throughout the code
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self,a,b):
|
domain = POSITIVE
|
||||||
|
def __init__(self, a, b):
|
||||||
self.a = float(a)
|
self.a = float(a)
|
||||||
self.b = float(b)
|
self.b = float(b)
|
||||||
self.constant = -gammaln(self.a) + a*np.log(b)
|
self.constant = -gammaln(self.a) + a * np.log(b)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "iGa("+str(np.round(self.a))+', '+str(np.round(self.b))+')'
|
return "iGa(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'
|
||||||
|
|
||||||
def lnpdf(self,x):
|
def lnpdf(self, x):
|
||||||
return self.constant - (self.a+1)*np.log(x) - self.b/x
|
return self.constant - (self.a + 1) * np.log(x) - self.b / x
|
||||||
|
|
||||||
def lnpdf_grad(self,x):
|
def lnpdf_grad(self, x):
|
||||||
return -(self.a+1.)/x + self.b/x**2
|
return -(self.a + 1.) / x + self.b / x ** 2
|
||||||
|
|
||||||
def rvs(self,n):
|
def rvs(self, n):
|
||||||
return 1./np.random.gamma(scale=1./self.b,shape=self.a,size=n)
|
return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,11 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv
|
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv
|
||||||
from ..util.plot import gpplot
|
|
||||||
from .. import kern
|
|
||||||
from GP import GP
|
|
||||||
from scipy import linalg
|
from scipy import linalg
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
|
from gp_base import GPBase
|
||||||
|
|
||||||
class sparse_GP(GP):
|
class sparse_GP(GPBase):
|
||||||
"""
|
"""
|
||||||
Variational sparse GP model
|
Variational sparse GP model
|
||||||
|
|
||||||
|
|
@ -31,6 +29,8 @@ class sparse_GP(GP):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
|
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
|
||||||
|
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||||
|
|
||||||
self.Z = Z
|
self.Z = Z
|
||||||
self.M = Z.shape[0]
|
self.M = Z.shape[0]
|
||||||
self.likelihood = likelihood
|
self.likelihood = likelihood
|
||||||
|
|
@ -42,13 +42,13 @@ class sparse_GP(GP):
|
||||||
self.has_uncertain_inputs = True
|
self.has_uncertain_inputs = True
|
||||||
self.X_variance = X_variance
|
self.X_variance = X_variance
|
||||||
|
|
||||||
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
|
if normalize_X:
|
||||||
|
self.Z = (self.Z.copy() - self._Xmean) / self._Xstd
|
||||||
|
|
||||||
# normalize X uncertainty also
|
# normalize X uncertainty also
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
self.X_variance /= np.square(self._Xstd)
|
self.X_variance /= np.square(self._Xstd)
|
||||||
|
|
||||||
|
|
||||||
def _compute_kernel_matrices(self):
|
def _compute_kernel_matrices(self):
|
||||||
# kernel computations, using BGPLVM notation
|
# kernel computations, using BGPLVM notation
|
||||||
self.Kmm = self.kern.K(self.Z)
|
self.Kmm = self.kern.K(self.Z)
|
||||||
|
|
@ -139,8 +139,6 @@ class sparse_GP(GP):
|
||||||
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
|
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
|
||||||
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
|
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
""" Compute the (lower bound on the) log marginal likelihood """
|
""" Compute the (lower bound on the) log marginal likelihood """
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
|
|
@ -161,10 +159,11 @@ class sparse_GP(GP):
|
||||||
self._computations()
|
self._computations()
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.hstack([self.Z.flatten(), GP._get_params(self)])
|
return np.hstack([self.Z.flatten(), self.kern._get_params_transformed(), self.likelihood._get_params()])
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], []) + GP._get_param_names(self)
|
return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[])\
|
||||||
|
+ self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
|
||||||
|
|
||||||
def update_likelihood_approximation(self):
|
def update_likelihood_approximation(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -282,3 +281,17 @@ class sparse_GP(GP):
|
||||||
|
|
||||||
return mean, var, _025pm, _975pm
|
return mean, var, _025pm, _975pm
|
||||||
|
|
||||||
|
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20):
|
||||||
|
GPBase.plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20)
|
||||||
|
if self.X.shape[1] == 1:
|
||||||
|
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
pb.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
|
||||||
|
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
|
||||||
|
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
||||||
|
Zu = self.Z * self._Xstd + self._Xmean
|
||||||
|
pb.plot(Zu, Zu * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
|
||||||
|
# pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten()))
|
||||||
|
|
||||||
|
elif self.X.shape[1] == 2: # FIXME
|
||||||
|
pb.plot(self.Z[:, 0], self.Z[:, 1], 'wo')
|
||||||
|
|
@ -3,11 +3,10 @@
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from GPy.core.domains import POSITIVE, NEGATIVE, BOUNDED
|
||||||
|
|
||||||
class transformation(object):
|
class transformation(object):
|
||||||
def __init__(self):
|
domain = None
|
||||||
# set the domain. Suggest we use 'positive', 'bounded', etc
|
|
||||||
self.domain = 'undefined'
|
|
||||||
def f(self, x):
|
def f(self, x):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
@ -24,8 +23,7 @@ class transformation(object):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
class logexp(transformation):
|
class logexp(transformation):
|
||||||
def __init__(self):
|
domain = POSITIVE
|
||||||
self.domain = 'positive'
|
|
||||||
def f(self, x):
|
def f(self, x):
|
||||||
return np.log(1. + np.exp(x))
|
return np.log(1. + np.exp(x))
|
||||||
def finv(self, f):
|
def finv(self, f):
|
||||||
|
|
@ -43,8 +41,8 @@ class logexp_clipped(transformation):
|
||||||
min_bound = 1e-10
|
min_bound = 1e-10
|
||||||
log_max_bound = np.log(max_bound)
|
log_max_bound = np.log(max_bound)
|
||||||
log_min_bound = np.log(min_bound)
|
log_min_bound = np.log(min_bound)
|
||||||
|
domain = POSITIVE
|
||||||
def __init__(self, lower=1e-6):
|
def __init__(self, lower=1e-6):
|
||||||
self.domain = 'positive'
|
|
||||||
self.lower = lower
|
self.lower = lower
|
||||||
def f(self, x):
|
def f(self, x):
|
||||||
exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
|
exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
|
||||||
|
|
@ -66,8 +64,7 @@ class logexp_clipped(transformation):
|
||||||
return '(+ve_c)'
|
return '(+ve_c)'
|
||||||
|
|
||||||
class exponent(transformation):
|
class exponent(transformation):
|
||||||
def __init__(self):
|
domain = POSITIVE
|
||||||
self.domain = 'positive'
|
|
||||||
def f(self, x):
|
def f(self, x):
|
||||||
return np.exp(x)
|
return np.exp(x)
|
||||||
def finv(self, x):
|
def finv(self, x):
|
||||||
|
|
@ -82,8 +79,7 @@ class exponent(transformation):
|
||||||
return '(+ve)'
|
return '(+ve)'
|
||||||
|
|
||||||
class negative_exponent(transformation):
|
class negative_exponent(transformation):
|
||||||
def __init__(self):
|
domain = NEGATIVE
|
||||||
self.domain = 'negative'
|
|
||||||
def f(self, x):
|
def f(self, x):
|
||||||
return -np.exp(x)
|
return -np.exp(x)
|
||||||
def finv(self, x):
|
def finv(self, x):
|
||||||
|
|
@ -98,8 +94,7 @@ class negative_exponent(transformation):
|
||||||
return '(-ve)'
|
return '(-ve)'
|
||||||
|
|
||||||
class square(transformation):
|
class square(transformation):
|
||||||
def __init__(self):
|
domain = POSITIVE
|
||||||
self.domain = 'positive'
|
|
||||||
def f(self, x):
|
def f(self, x):
|
||||||
return x ** 2
|
return x ** 2
|
||||||
def finv(self, x):
|
def finv(self, x):
|
||||||
|
|
@ -112,8 +107,8 @@ class square(transformation):
|
||||||
return '(+sq)'
|
return '(+sq)'
|
||||||
|
|
||||||
class logistic(transformation):
|
class logistic(transformation):
|
||||||
|
domain = BOUNDED
|
||||||
def __init__(self, lower, upper):
|
def __init__(self, lower, upper):
|
||||||
self.domain = 'bounded'
|
|
||||||
assert lower < upper
|
assert lower < upper
|
||||||
self.lower, self.upper = float(lower), float(upper)
|
self.lower, self.upper = float(lower), float(upper)
|
||||||
self.difference = self.upper - self.lower
|
self.difference = self.upper - self.lower
|
||||||
|
|
|
||||||
|
|
@ -21,13 +21,15 @@ def crescent_data(seed=default_seed): # FIXME
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data = GPy.util.datasets.crescent_data(seed=seed)
|
data = GPy.util.datasets.crescent_data(seed=seed)
|
||||||
|
Y = data['Y']
|
||||||
|
Y[Y.flatten()==-1] = 0
|
||||||
|
|
||||||
# Kernel object
|
# Kernel object
|
||||||
kernel = GPy.kern.rbf(data['X'].shape[1])
|
kernel = GPy.kern.rbf(data['X'].shape[1])
|
||||||
|
|
||||||
# Likelihood object
|
# Likelihood object
|
||||||
distribution = GPy.likelihoods.likelihood_functions.probit()
|
distribution = GPy.likelihoods.likelihood_functions.binomial()
|
||||||
likelihood = GPy.likelihoods.EP(data['Y'], distribution)
|
likelihood = GPy.likelihoods.EP(Y, distribution)
|
||||||
|
|
||||||
|
|
||||||
m = GPy.models.GP(data['X'], likelihood, kernel)
|
m = GPy.models.GP(data['X'], likelihood, kernel)
|
||||||
|
|
@ -49,12 +51,15 @@ def oil():
|
||||||
Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
|
Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
|
||||||
"""
|
"""
|
||||||
data = GPy.util.datasets.oil()
|
data = GPy.util.datasets.oil()
|
||||||
|
Y = data['Y'][:, 0:1]
|
||||||
|
Y[Y.flatten()==-1] = 0
|
||||||
|
|
||||||
# Kernel object
|
# Kernel object
|
||||||
kernel = GPy.kern.rbf(12)
|
kernel = GPy.kern.rbf(12)
|
||||||
|
|
||||||
# Likelihood object
|
# Likelihood object
|
||||||
distribution = GPy.likelihoods.likelihood_functions.probit()
|
distribution = GPy.likelihoods.likelihood_functions.binomial()
|
||||||
likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1], distribution)
|
likelihood = GPy.likelihoods.EP(Y, distribution)
|
||||||
|
|
||||||
# Create GP model
|
# Create GP model
|
||||||
m = GPy.models.GP(data['X'], likelihood=likelihood, kernel=kernel)
|
m = GPy.models.GP(data['X'], likelihood=likelihood, kernel=kernel)
|
||||||
|
|
@ -79,12 +84,14 @@ def toy_linear_1d_classification(seed=default_seed):
|
||||||
|
|
||||||
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
||||||
Y = data['Y'][:, 0:1]
|
Y = data['Y'][:, 0:1]
|
||||||
|
Y[Y.flatten() == -1] = 0
|
||||||
|
|
||||||
# Kernel object
|
# Kernel object
|
||||||
kernel = GPy.kern.rbf(1)
|
kernel = GPy.kern.rbf(1)
|
||||||
|
|
||||||
# Likelihood object
|
# Likelihood object
|
||||||
distribution = GPy.likelihoods.likelihood_functions.probit()
|
link = GPy.likelihoods.link_functions.probit
|
||||||
|
distribution = GPy.likelihoods.likelihood_functions.binomial(link)
|
||||||
likelihood = GPy.likelihoods.EP(Y, distribution)
|
likelihood = GPy.likelihoods.EP(Y, distribution)
|
||||||
|
|
||||||
# Model definition
|
# Model definition
|
||||||
|
|
@ -115,12 +122,13 @@ def sparse_toy_linear_1d_classification(seed=default_seed):
|
||||||
|
|
||||||
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
|
||||||
Y = data['Y'][:, 0:1]
|
Y = data['Y'][:, 0:1]
|
||||||
|
Y[Y.flatten() == -1] = 0
|
||||||
|
|
||||||
# Kernel object
|
# Kernel object
|
||||||
kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
|
kernel = GPy.kern.rbf(1) + GPy.kern.white(1)
|
||||||
|
|
||||||
# Likelihood object
|
# Likelihood object
|
||||||
distribution = GPy.likelihoods.likelihood_functions.probit()
|
distribution = GPy.likelihoods.likelihood_functions.binomial()
|
||||||
likelihood = GPy.likelihoods.EP(Y, distribution)
|
likelihood = GPy.likelihoods.EP(Y, distribution)
|
||||||
|
|
||||||
Z = np.random.uniform(data['X'].min(), data['X'].max(), (10, 1))
|
Z = np.random.uniform(data['X'].min(), data['X'].max(), (10, 1))
|
||||||
|
|
@ -156,13 +164,15 @@ def sparse_crescent_data(inducing=10, seed=default_seed):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data = GPy.util.datasets.crescent_data(seed=seed)
|
data = GPy.util.datasets.crescent_data(seed=seed)
|
||||||
|
Y = data['Y']
|
||||||
|
Y[Y.flatten()==-1]=0
|
||||||
|
|
||||||
# Kernel object
|
# Kernel object
|
||||||
kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
|
kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
|
||||||
|
|
||||||
# Likelihood object
|
# Likelihood object
|
||||||
distribution = GPy.likelihoods.likelihood_functions.probit()
|
distribution = GPy.likelihoods.likelihood_functions.binomial()
|
||||||
likelihood = GPy.likelihoods.EP(data['Y'], distribution)
|
likelihood = GPy.likelihoods.EP(Y, distribution)
|
||||||
|
|
||||||
sample = np.random.randint(0, data['X'].shape[0], inducing)
|
sample = np.random.randint(0, data['X'].shape[0], inducing)
|
||||||
Z = data['X'][sample, :]
|
Z = data['X'][sample, :]
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ from matplotlib import pyplot as plt
|
||||||
import GPy
|
import GPy
|
||||||
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
|
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
|
||||||
from GPy.util.datasets import swiss_roll_generated
|
from GPy.util.datasets import swiss_roll_generated
|
||||||
|
from GPy.core.transformations import logexp
|
||||||
|
|
||||||
default_seed = np.random.seed(123344)
|
default_seed = np.random.seed(123344)
|
||||||
|
|
||||||
|
|
@ -17,11 +18,11 @@ def BGPLVM(seed=default_seed):
|
||||||
D = 4
|
D = 4
|
||||||
# generate GPLVM-like data
|
# generate GPLVM-like data
|
||||||
X = np.random.rand(N, Q)
|
X = np.random.rand(N, Q)
|
||||||
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
|
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
|
||||||
K = k.K(X)
|
K = k.K(X)
|
||||||
Y = np.random.multivariate_normal(np.zeros(N), K, D).T
|
Y = np.random.multivariate_normal(np.zeros(N), K, D).T
|
||||||
|
|
||||||
k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True) + GPy.kern.white(Q)
|
k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True) + GPy.kern.white(Q)
|
||||||
# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
|
# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
|
||||||
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
||||||
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
|
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
|
||||||
|
|
@ -187,8 +188,8 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
|
||||||
Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
|
Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
|
||||||
Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
|
Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
|
||||||
|
|
||||||
Y1 += .1 * np.random.randn(*Y1.shape)
|
Y1 += .3 * np.random.randn(*Y1.shape)
|
||||||
Y2 += .1 * np.random.randn(*Y2.shape)
|
Y2 += .2 * np.random.randn(*Y2.shape)
|
||||||
Y3 += .1 * np.random.randn(*Y3.shape)
|
Y3 += .1 * np.random.randn(*Y3.shape)
|
||||||
|
|
||||||
Y1 -= Y1.mean(0)
|
Y1 -= Y1.mean(0)
|
||||||
|
|
@ -262,13 +263,13 @@ def bgplvm_simulation(optimize='scg',
|
||||||
# m.constrain('variance|noise', logexp_clipped())
|
# m.constrain('variance|noise', logexp_clipped())
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m['noise'] = Y.var() / 100.
|
m['noise'] = Y.var() / 100.
|
||||||
m['linear_variance'] = .01
|
m['linear_variance'] = .001
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
print "Optimizing model:"
|
print "Optimizing model:"
|
||||||
m.optimize('bfgs', max_iters=max_f_eval,
|
m.optimize('scg', max_iters=max_f_eval,
|
||||||
max_f_eval=max_f_eval,
|
max_f_eval=max_f_eval,
|
||||||
messages=True, gtol=1e-2)
|
messages=True, gtol=1e-6)
|
||||||
if plot:
|
if plot:
|
||||||
import pylab
|
import pylab
|
||||||
m.plot_X_1d()
|
m.plot_X_1d()
|
||||||
|
|
@ -277,23 +278,21 @@ def bgplvm_simulation(optimize='scg',
|
||||||
m.kern.plot_ARD()
|
m.kern.plot_ARD()
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def mrd_simulation(optimize=True, plot_sim=False, **kw):
|
def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
|
||||||
D1, D2, D3, N, M, Q = 150, 200, 400, 300, 3, 7
|
D1, D2, D3, N, M, Q = 150, 200, 400, 500, 3, 7
|
||||||
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
|
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
|
||||||
|
|
||||||
from GPy.models import mrd
|
from GPy.models import mrd
|
||||||
from GPy import kern
|
from GPy import kern
|
||||||
from GPy.core.transformations import logexp_clipped
|
|
||||||
|
|
||||||
reload(mrd); reload(kern)
|
reload(mrd); reload(kern)
|
||||||
|
|
||||||
k = kern.linear(Q, [0.05] * Q, True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
|
k = kern.linear(Q, [.05] * Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
|
||||||
m = mrd.MRD(Ylist, Q=Q, M=M, kernels=k, initx="concat", initz='permute', **kw)
|
m = mrd.MRD(Ylist, Q=Q, M=M, kernels=k, initx="", initz='permute', **kw)
|
||||||
|
|
||||||
for i, Y in enumerate(Ylist):
|
for i, Y in enumerate(Ylist):
|
||||||
m['{}_noise'.format(i + 1)] = Y.var() / 100.
|
m['{}_noise'.format(i + 1)] = Y.var() / 100.
|
||||||
|
|
||||||
# m.constrain('variance|noise', logexp_clipped(1e-6))
|
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
|
|
||||||
# DEBUG
|
# DEBUG
|
||||||
|
|
@ -301,8 +300,10 @@ def mrd_simulation(optimize=True, plot_sim=False, **kw):
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
print "Optimizing Model:"
|
print "Optimizing Model:"
|
||||||
m.optimize('bfgs', messages=1, max_iters=3e3)
|
m.optimize('scg', messages=1, max_iters=5e4, max_f_eval=5e4)
|
||||||
|
if plot:
|
||||||
|
m.plot_X_1d()
|
||||||
|
m.plot_scales()
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def brendan_faces():
|
def brendan_faces():
|
||||||
|
|
@ -323,7 +324,7 @@ def brendan_faces():
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m.optimize('scg', messages=1, max_f_eval=10000)
|
m.optimize('scg', messages=1, max_f_eval=10000)
|
||||||
|
|
||||||
ax = m.plot_latent(which_indices=(0,1))
|
ax = m.plot_latent(which_indices=(0, 1))
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
|
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
|
||||||
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ import numpy as np
|
||||||
import GPy
|
import GPy
|
||||||
|
|
||||||
|
|
||||||
def toy_rbf_1d():
|
def toy_rbf_1d(max_nb_eval_optim=100):
|
||||||
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
|
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
|
||||||
data = GPy.util.datasets.toy_rbf_1d()
|
data = GPy.util.datasets.toy_rbf_1d()
|
||||||
|
|
||||||
|
|
@ -19,13 +19,13 @@ def toy_rbf_1d():
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m.optimize()
|
m.optimize(max_f_eval=max_nb_eval_optim)
|
||||||
# plot
|
# plot
|
||||||
m.plot()
|
m.plot()
|
||||||
print(m)
|
print(m)
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def rogers_girolami_olympics():
|
def rogers_girolami_olympics(max_nb_eval_optim=100):
|
||||||
"""Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
|
"""Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
|
||||||
data = GPy.util.datasets.rogers_girolami_olympics()
|
data = GPy.util.datasets.rogers_girolami_olympics()
|
||||||
|
|
||||||
|
|
@ -34,14 +34,14 @@ def rogers_girolami_olympics():
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m.optimize()
|
m.optimize(max_f_eval=max_nb_eval_optim)
|
||||||
|
|
||||||
# plot
|
# plot
|
||||||
m.plot(plot_limits = (1850, 2050))
|
m.plot(plot_limits = (1850, 2050))
|
||||||
print(m)
|
print(m)
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def toy_rbf_1d_50():
|
def toy_rbf_1d_50(max_nb_eval_optim=100):
|
||||||
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
|
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
|
||||||
data = GPy.util.datasets.toy_rbf_1d_50()
|
data = GPy.util.datasets.toy_rbf_1d_50()
|
||||||
|
|
||||||
|
|
@ -50,14 +50,14 @@ def toy_rbf_1d_50():
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m.optimize()
|
m.optimize(max_f_eval=max_nb_eval_optim)
|
||||||
|
|
||||||
# plot
|
# plot
|
||||||
m.plot()
|
m.plot()
|
||||||
print(m)
|
print(m)
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def silhouette():
|
def silhouette(max_nb_eval_optim=100):
|
||||||
"""Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
|
"""Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
|
||||||
data = GPy.util.datasets.silhouette()
|
data = GPy.util.datasets.silhouette()
|
||||||
|
|
||||||
|
|
@ -66,12 +66,12 @@ def silhouette():
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m.optimize(messages=True)
|
m.optimize(messages=True,max_f_eval=max_nb_eval_optim)
|
||||||
|
|
||||||
print(m)
|
print(m)
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def coregionalisation_toy2():
|
def coregionalisation_toy2(max_nb_eval_optim=100):
|
||||||
"""
|
"""
|
||||||
A simple demonstration of coregionalisation on two sinusoidal functions.
|
A simple demonstration of coregionalisation on two sinusoidal functions.
|
||||||
"""
|
"""
|
||||||
|
|
@ -90,8 +90,7 @@ def coregionalisation_toy2():
|
||||||
m.constrain_fixed('rbf_var',1.)
|
m.constrain_fixed('rbf_var',1.)
|
||||||
m.constrain_positive('kappa')
|
m.constrain_positive('kappa')
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m.optimize('sim',max_f_eval=5000,messages=1)
|
m.optimize('sim',messages=1,max_f_eval=max_nb_eval_optim)
|
||||||
#m.optimize()
|
|
||||||
|
|
||||||
pb.figure()
|
pb.figure()
|
||||||
Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
|
Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
|
||||||
|
|
@ -104,7 +103,7 @@ def coregionalisation_toy2():
|
||||||
pb.plot(X2[:,0],Y2[:,0],'gx',mew=2)
|
pb.plot(X2[:,0],Y2[:,0],'gx',mew=2)
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def coregionalisation_toy():
|
def coregionalisation_toy(max_nb_eval_optim=100):
|
||||||
"""
|
"""
|
||||||
A simple demonstration of coregionalisation on two sinusoidal functions.
|
A simple demonstration of coregionalisation on two sinusoidal functions.
|
||||||
"""
|
"""
|
||||||
|
|
@ -123,7 +122,7 @@ def coregionalisation_toy():
|
||||||
m.constrain_fixed('rbf_var',1.)
|
m.constrain_fixed('rbf_var',1.)
|
||||||
m.constrain_positive('kappa')
|
m.constrain_positive('kappa')
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m.optimize()
|
m.optimize(max_f_eval=max_nb_eval_optim)
|
||||||
|
|
||||||
pb.figure()
|
pb.figure()
|
||||||
Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
|
Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
|
||||||
|
|
@ -137,7 +136,7 @@ def coregionalisation_toy():
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
|
||||||
def coregionalisation_sparse():
|
def coregionalisation_sparse(max_nb_eval_optim=100):
|
||||||
"""
|
"""
|
||||||
A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations.
|
A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations.
|
||||||
"""
|
"""
|
||||||
|
|
@ -162,7 +161,7 @@ def coregionalisation_sparse():
|
||||||
m.constrain_positive('kappa')
|
m.constrain_positive('kappa')
|
||||||
m.constrain_fixed('iip')
|
m.constrain_fixed('iip')
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m.optimize_restarts(5,robust=True,messages=1)
|
m.optimize_restarts(5, robust=True, messages=1, max_f_eval=max_nb_eval_optim)
|
||||||
|
|
||||||
pb.figure()
|
pb.figure()
|
||||||
Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
|
Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1))))
|
||||||
|
|
@ -179,7 +178,7 @@ def coregionalisation_sparse():
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
|
||||||
def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000):
|
def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000, max_nb_eval_optim=100):
|
||||||
"""Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher."""
|
"""Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher."""
|
||||||
|
|
||||||
# Contour over a range of length scales and signal/noise ratios.
|
# Contour over a range of length scales and signal/noise ratios.
|
||||||
|
|
@ -217,7 +216,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000
|
||||||
|
|
||||||
# optimize
|
# optimize
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m.optimize(xtol=1e-6,ftol=1e-6)
|
m.optimize(xtol=1e-6, ftol=1e-6, max_f_eval=max_nb_eval_optim)
|
||||||
|
|
||||||
optim_point_x[1] = m.get('rbf_lengthscale')
|
optim_point_x[1] = m.get('rbf_lengthscale')
|
||||||
optim_point_y[1] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
|
optim_point_y[1] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
|
||||||
|
|
@ -264,7 +263,7 @@ def _contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf
|
||||||
lls.append(length_scale_lls)
|
lls.append(length_scale_lls)
|
||||||
return np.array(lls)
|
return np.array(lls)
|
||||||
|
|
||||||
def sparse_GP_regression_1D(N = 400, M = 5):
|
def sparse_GP_regression_1D(N = 400, M = 5, max_nb_eval_optim=100):
|
||||||
"""Run a 1D example of a sparse GP regression."""
|
"""Run a 1D example of a sparse GP regression."""
|
||||||
# sample inputs and outputs
|
# sample inputs and outputs
|
||||||
X = np.random.uniform(-3.,3.,(N,1))
|
X = np.random.uniform(-3.,3.,(N,1))
|
||||||
|
|
@ -279,11 +278,11 @@ def sparse_GP_regression_1D(N = 400, M = 5):
|
||||||
m.constrain_positive('(variance|lengthscale|precision)')
|
m.constrain_positive('(variance|lengthscale|precision)')
|
||||||
|
|
||||||
m.checkgrad(verbose=1)
|
m.checkgrad(verbose=1)
|
||||||
m.optimize('tnc', messages = 1)
|
m.optimize('tnc', messages = 1, max_f_eval=max_nb_eval_optim)
|
||||||
m.plot()
|
m.plot()
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def sparse_GP_regression_2D(N = 400, M = 50):
|
def sparse_GP_regression_2D(N = 400, M = 50, max_nb_eval_optim=100):
|
||||||
"""Run a 2D example of a sparse GP regression."""
|
"""Run a 2D example of a sparse GP regression."""
|
||||||
X = np.random.uniform(-3.,3.,(N,2))
|
X = np.random.uniform(-3.,3.,(N,2))
|
||||||
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05
|
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05
|
||||||
|
|
@ -294,7 +293,7 @@ def sparse_GP_regression_2D(N = 400, M = 50):
|
||||||
kernel = rbf + noise
|
kernel = rbf + noise
|
||||||
|
|
||||||
# create simple GP model
|
# create simple GP model
|
||||||
m = GPy.models.sparse_GP_regression(X,Y,kernel, M = M)
|
m = GPy.models.sparse_GP_regression(X,Y,kernel, M = M, max_nb_eval_optim=100)
|
||||||
|
|
||||||
# contrain all parameters to be positive (but not inducing inputs)
|
# contrain all parameters to be positive (but not inducing inputs)
|
||||||
m.constrain_positive('(variance|lengthscale|precision)')
|
m.constrain_positive('(variance|lengthscale|precision)')
|
||||||
|
|
@ -304,12 +303,12 @@ def sparse_GP_regression_2D(N = 400, M = 50):
|
||||||
|
|
||||||
# optimize and plot
|
# optimize and plot
|
||||||
pb.figure()
|
pb.figure()
|
||||||
m.optimize('tnc', messages = 1)
|
m.optimize('tnc', messages = 1, max_f_eval=max_nb_eval_optim)
|
||||||
m.plot()
|
m.plot()
|
||||||
print(m)
|
print(m)
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def uncertain_inputs_sparse_regression():
|
def uncertain_inputs_sparse_regression(max_nb_eval_optim=100):
|
||||||
"""Run a 1D example of a sparse GP regression with uncertain inputs."""
|
"""Run a 1D example of a sparse GP regression with uncertain inputs."""
|
||||||
# sample inputs and outputs
|
# sample inputs and outputs
|
||||||
S = np.ones((20,1))
|
S = np.ones((20,1))
|
||||||
|
|
@ -327,7 +326,7 @@ def uncertain_inputs_sparse_regression():
|
||||||
m.constrain_positive('(variance|prec)')
|
m.constrain_positive('(variance|prec)')
|
||||||
|
|
||||||
# optimize and plot
|
# optimize and plot
|
||||||
m.optimize('tnc', max_f_eval = 1000, messages=1)
|
m.optimize('tnc', messages=1, max_f_eval=max_nb_eval_optim)
|
||||||
m.plot()
|
m.plot()
|
||||||
print(m)
|
print(m)
|
||||||
return m
|
return m
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
|
||||||
ftol = 1e-6
|
ftol = 1e-6
|
||||||
if gtol is None:
|
if gtol is None:
|
||||||
gtol = 1e-5
|
gtol = 1e-5
|
||||||
sigma0 = 1.0e-4
|
sigma0 = 1.0e-8
|
||||||
fold = f(x, *optargs) # Initial function value.
|
fold = f(x, *optargs) # Initial function value.
|
||||||
function_eval = 1
|
function_eval = 1
|
||||||
fnow = fold
|
fnow = fold
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ class EP(likelihood):
|
||||||
self.N, self.D = self.data.shape
|
self.N, self.D = self.data.shape
|
||||||
self.is_heteroscedastic = True
|
self.is_heteroscedastic = True
|
||||||
self.Nparams = 0
|
self.Nparams = 0
|
||||||
|
self._transf_data = self.likelihood_function._preprocess_values(data)
|
||||||
|
|
||||||
#Initial values - Likelihood approximation parameters:
|
#Initial values - Likelihood approximation parameters:
|
||||||
#p(y|f) = t(f|tau_tilde,v_tilde)
|
#p(y|f) = t(f|tau_tilde,v_tilde)
|
||||||
|
|
|
||||||
|
|
@ -51,11 +51,15 @@ class Gaussian(likelihood):
|
||||||
return ["noise_variance"]
|
return ["noise_variance"]
|
||||||
|
|
||||||
def _set_params(self, x):
|
def _set_params(self, x):
|
||||||
x = float(x)
|
x = np.float64(x)
|
||||||
if self._variance != x:
|
if self._variance != x:
|
||||||
self.precision = 1. / x
|
if x == 0.:
|
||||||
|
self.precision = None
|
||||||
|
self.V = None
|
||||||
|
else:
|
||||||
|
self.precision = 1. / x
|
||||||
|
self.V = (self.precision) * self.Y
|
||||||
self.covariance_matrix = np.eye(self.N) * x
|
self.covariance_matrix = np.eye(self.N) * x
|
||||||
self.V = (self.precision) * self.Y
|
|
||||||
self._variance = x
|
self._variance = x
|
||||||
|
|
||||||
def predictive_values(self, mu, var, full_cov):
|
def predictive_values(self, mu, var, full_cov):
|
||||||
|
|
|
||||||
|
|
@ -8,19 +8,68 @@ import scipy as sp
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
from ..util.plot import gpplot
|
from ..util.plot import gpplot
|
||||||
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
import link_functions
|
||||||
|
|
||||||
class likelihood_function:
|
class likelihood_function(object):
|
||||||
"""
|
"""
|
||||||
Likelihood class for doing Expectation propagation
|
Likelihood class for doing Expectation propagation
|
||||||
|
|
||||||
:param Y: observed output (Nx1 numpy.darray)
|
:param Y: observed output (Nx1 numpy.darray)
|
||||||
..Note:: Y values allowed depend on the likelihood_function used
|
..Note:: Y values allowed depend on the likelihood_function used
|
||||||
"""
|
"""
|
||||||
def __init__(self,location=0,scale=1):
|
def __init__(self,link):
|
||||||
self.location = location
|
if link == self._analytical:
|
||||||
self.scale = scale
|
self.moments_match = self._moments_match_analytical
|
||||||
|
else:
|
||||||
|
assert isinstance(link,link_functions.link_function)
|
||||||
|
self.link = link
|
||||||
|
self.moments_match = self._moments_match_numerical
|
||||||
|
|
||||||
class probit(likelihood_function):
|
def _preprocess_values(self,Y):
|
||||||
|
return Y
|
||||||
|
|
||||||
|
def _product(self,gp,obs,mu,sigma):
|
||||||
|
return stats.norm.pdf(gp,loc=mu,scale=sigma) * self._distribution(gp,obs)
|
||||||
|
|
||||||
|
def _nlog_product(self,gp,obs,mu,sigma):
|
||||||
|
return -(-.5*(gp-mu)**2/sigma**2 + self._log_distribution(gp,obs))
|
||||||
|
|
||||||
|
def _locate(self,obs,mu,sigma):
|
||||||
|
"""
|
||||||
|
Golden Search to find the mode in the _product function (cavity x exact likelihood) and define a grid around it for numerical integration
|
||||||
|
"""
|
||||||
|
golden_A = -1 if obs == 0 else np.array([np.log(obs),mu]).min() #Lower limit
|
||||||
|
golden_B = np.array([np.log(obs),mu]).max() #Upper limit
|
||||||
|
return sp.optimize.golden(self._nlog_product, args=(obs,mu,sigma), brack=(golden_A,golden_B)) #Better to work with _nlog_product than with _product
|
||||||
|
|
||||||
|
def _moments_match_numerical(self,obs,tau,v):
|
||||||
|
"""
|
||||||
|
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
|
||||||
|
"""
|
||||||
|
mu = v/tau
|
||||||
|
sigma = np.sqrt(1./tau)
|
||||||
|
opt = self._locate(obs,mu,sigma)
|
||||||
|
width = 3./np.log(max(obs,2))
|
||||||
|
A = opt - width #Grid's lower limit
|
||||||
|
B = opt + width #Grid's Upper limit
|
||||||
|
K = 10*int(np.log(max(obs,150))) #Number of points in the grid
|
||||||
|
h = (B-A)/K # length of the intervals
|
||||||
|
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
|
||||||
|
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
|
||||||
|
_aux1 = self._product(A,obs,mu,sigma)
|
||||||
|
_aux2 = self._product(B,obs,mu,sigma)
|
||||||
|
_aux3 = 4*self._product(grid_x[range(1,K,2)],obs,mu,sigma)
|
||||||
|
_aux4 = 2*self._product(grid_x[range(2,K-1,2)],obs,mu,sigma)
|
||||||
|
zeroth = np.hstack((_aux1,_aux2,_aux3,_aux4)) # grid of points (Y axis) rearranged
|
||||||
|
first = zeroth*x
|
||||||
|
second = first*x
|
||||||
|
Z_hat = sum(zeroth)*h/3 # Zero-th moment
|
||||||
|
mu_hat = sum(first)*h/(3*Z_hat) # First moment
|
||||||
|
m2 = sum(second)*h/(3*Z_hat) # Second moment
|
||||||
|
sigma2_hat = m2 - mu_hat**2 # Second central moment
|
||||||
|
return float(Z_hat), float(mu_hat), float(sigma2_hat)
|
||||||
|
|
||||||
|
class binomial(likelihood_function):
|
||||||
"""
|
"""
|
||||||
Probit likelihood
|
Probit likelihood
|
||||||
Y is expected to take values in {-1,1}
|
Y is expected to take values in {-1,1}
|
||||||
|
|
@ -29,8 +78,33 @@ class probit(likelihood_function):
|
||||||
L(x) = \\Phi (Y_i*f_i)
|
L(x) = \\Phi (Y_i*f_i)
|
||||||
$$
|
$$
|
||||||
"""
|
"""
|
||||||
|
def __init__(self,link=None):
|
||||||
|
self._analytical = link_functions.probit
|
||||||
|
if not link:
|
||||||
|
link = self._analytical
|
||||||
|
super(binomial, self).__init__(link)
|
||||||
|
|
||||||
def moments_match(self,data_i,tau_i,v_i):
|
def _distribution(self,gp,obs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _log_distribution(self,gp,obs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _preprocess_values(self,Y):
|
||||||
|
"""
|
||||||
|
Check if the values of the observations correspond to the values
|
||||||
|
assumed by the likelihood function.
|
||||||
|
|
||||||
|
..Note:: Binary classification algorithm works better with classes {-1,1}
|
||||||
|
"""
|
||||||
|
Y_prep = Y.copy()
|
||||||
|
Y1 = Y[Y.flatten()==1].size
|
||||||
|
Y2 = Y[Y.flatten()==0].size
|
||||||
|
assert Y1 + Y2 == Y.size, 'Binomial likelihood is meant to be used only with outputs in {0,1}.'
|
||||||
|
Y_prep[Y.flatten() == 0] = -1
|
||||||
|
return Y_prep
|
||||||
|
|
||||||
|
def _moments_match_analytical(self,data_i,tau_i,v_i):
|
||||||
"""
|
"""
|
||||||
Moments match of the marginal approximation in EP algorithm
|
Moments match of the marginal approximation in EP algorithm
|
||||||
|
|
||||||
|
|
@ -38,8 +112,6 @@ class probit(likelihood_function):
|
||||||
:param tau_i: precision of the cavity distribution (float)
|
:param tau_i: precision of the cavity distribution (float)
|
||||||
:param v_i: mean/variance of the cavity distribution (float)
|
:param v_i: mean/variance of the cavity distribution (float)
|
||||||
"""
|
"""
|
||||||
#if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
|
|
||||||
# TODO: some version of assert
|
|
||||||
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
|
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
|
||||||
Z_hat = std_norm_cdf(z)
|
Z_hat = std_norm_cdf(z)
|
||||||
phi = std_norm_pdf(z)
|
phi = std_norm_pdf(z)
|
||||||
|
|
@ -50,6 +122,8 @@ class probit(likelihood_function):
|
||||||
def predictive_values(self,mu,var):
|
def predictive_values(self,mu,var):
|
||||||
"""
|
"""
|
||||||
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction
|
Compute mean, variance and conficence interval (percentiles 5 and 95) of the prediction
|
||||||
|
:param mu: mean of the latent variable
|
||||||
|
:param var: variance of the latent variable
|
||||||
"""
|
"""
|
||||||
mu = mu.flatten()
|
mu = mu.flatten()
|
||||||
var = var.flatten()
|
var = var.flatten()
|
||||||
|
|
@ -69,68 +143,23 @@ class Poisson(likelihood_function):
|
||||||
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
|
||||||
$$
|
$$
|
||||||
"""
|
"""
|
||||||
def moments_match(self,data_i,tau_i,v_i):
|
def __init__(self,link=None):
|
||||||
"""
|
self._analytical = None
|
||||||
Moments match of the marginal approximation in EP algorithm
|
if not link:
|
||||||
|
link = link_functions.log()
|
||||||
|
super(Poisson, self).__init__(link)
|
||||||
|
|
||||||
:param i: number of observation (int)
|
def _distribution(self,gp,obs):
|
||||||
:param tau_i: precision of the cavity distribution (float)
|
return stats.poisson.pmf(obs,self.link.inv_transf(gp))
|
||||||
:param v_i: mean/variance of the cavity distribution (float)
|
|
||||||
"""
|
|
||||||
mu = v_i/tau_i
|
|
||||||
sigma = np.sqrt(1./tau_i)
|
|
||||||
def poisson_norm(f):
|
|
||||||
"""
|
|
||||||
Product of the likelihood and the cavity distribution
|
|
||||||
"""
|
|
||||||
pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
|
|
||||||
rate = np.exp( (f*self.scale)+self.location)
|
|
||||||
poisson = stats.poisson.pmf(float(data_i),rate)
|
|
||||||
return pdf_norm_f*poisson
|
|
||||||
|
|
||||||
def log_pnm(f):
|
def _log_distribution(self,gp,obs):
|
||||||
"""
|
return - self.link.inv_transf(gp) + obs * self.link.log_inv_transf(gp)
|
||||||
Log of poisson_norm
|
|
||||||
"""
|
|
||||||
return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*data_i)
|
|
||||||
|
|
||||||
"""
|
|
||||||
Golden Search and Simpson's Rule
|
|
||||||
--------------------------------
|
|
||||||
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
|
|
||||||
Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
|
|
||||||
"""
|
|
||||||
#TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
|
|
||||||
|
|
||||||
#Golden search
|
|
||||||
golden_A = -1 if data_i == 0 else np.array([np.log(data_i),mu]).min() #Lower limit
|
|
||||||
golden_B = np.array([np.log(data_i),mu]).max() #Upper limit
|
|
||||||
golden_A = (golden_A - self.location)/self.scale
|
|
||||||
golden_B = (golden_B - self.location)/self.scale
|
|
||||||
opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
|
|
||||||
|
|
||||||
# Simpson's approximation
|
|
||||||
width = 3./np.log(max(data_i,2))
|
|
||||||
A = opt - width #Lower limit
|
|
||||||
B = opt + width #Upper limit
|
|
||||||
K = 10*int(np.log(max(data_i,150))) #Number of points in the grid, we DON'T want K to be the same number for every case
|
|
||||||
h = (B-A)/K # length of the intervals
|
|
||||||
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
|
|
||||||
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
|
|
||||||
zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
|
|
||||||
first = zeroth*x
|
|
||||||
second = first*x
|
|
||||||
Z_hat = sum(zeroth)*h/3 # Zero-th moment
|
|
||||||
mu_hat = sum(first)*h/(3*Z_hat) # First moment
|
|
||||||
m2 = sum(second)*h/(3*Z_hat) # Second moment
|
|
||||||
sigma2_hat = m2 - mu_hat**2 # Second central moment
|
|
||||||
return float(Z_hat), float(mu_hat), float(sigma2_hat)
|
|
||||||
|
|
||||||
def predictive_values(self,mu,var):
|
def predictive_values(self,mu,var):
|
||||||
"""
|
"""
|
||||||
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
|
Compute mean, and conficence interval (percentiles 5 and 95) of the prediction
|
||||||
"""
|
"""
|
||||||
mean = np.exp(mu*self.scale + self.location)
|
mean = self.link.transf(mu)#np.exp(mu*self.scale + self.location)
|
||||||
tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
|
tmp = stats.poisson.ppf(np.array([.025,.975]),mean)
|
||||||
p_025 = tmp[:,0]
|
p_025 = tmp[:,0]
|
||||||
p_975 = tmp[:,1]
|
p_975 = tmp[:,1]
|
||||||
|
|
|
||||||
58
GPy/likelihoods/link_functions.py
Normal file
58
GPy/likelihoods/link_functions.py
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
# Copyright (c) 2012, 2013 Ricardo Andrade
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from scipy import stats
|
||||||
|
import scipy as sp
|
||||||
|
import pylab as pb
|
||||||
|
from ..util.plot import gpplot
|
||||||
|
from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf
|
||||||
|
|
||||||
|
class link_function(object):
|
||||||
|
"""
|
||||||
|
Link function class for doing non-Gaussian likelihoods approximation
|
||||||
|
|
||||||
|
:param Y: observed output (Nx1 numpy.darray)
|
||||||
|
..Note:: Y values allowed depend on the likelihood_function used
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class identity(link_function):
|
||||||
|
def transf(self,mu):
|
||||||
|
return mu
|
||||||
|
|
||||||
|
def inv_transf(self,f):
|
||||||
|
return f
|
||||||
|
|
||||||
|
def log_inv_transf(self,f):
|
||||||
|
return np.log(f)
|
||||||
|
|
||||||
|
class log(link_function):
|
||||||
|
|
||||||
|
def transf(self,mu):
|
||||||
|
return np.log(mu)
|
||||||
|
|
||||||
|
def inv_transf(self,f):
|
||||||
|
return np.exp(f)
|
||||||
|
|
||||||
|
def log_inv_transf(self,f):
|
||||||
|
return f
|
||||||
|
|
||||||
|
class log_ex_1(link_function):
|
||||||
|
def transf(self,mu):
|
||||||
|
return np.log(np.exp(mu) - 1)
|
||||||
|
|
||||||
|
def inv_transf(self,f):
|
||||||
|
return np.log(np.exp(f)+1)
|
||||||
|
|
||||||
|
def log_inv_tranf(self,f):
|
||||||
|
return np.log(np.log(np.exp(f)+1))
|
||||||
|
|
||||||
|
class probit(link_function):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -5,7 +5,7 @@ import numpy as np
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
import sys, pdb
|
import sys, pdb
|
||||||
from GPLVM import GPLVM
|
from GPLVM import GPLVM
|
||||||
from sparse_GP import sparse_GP
|
from ..core import sparse_GP
|
||||||
from GPy.util.linalg import pdinv
|
from GPy.util.linalg import pdinv
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
from .. import kern
|
from .. import kern
|
||||||
|
|
@ -65,6 +65,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
self._savedABCD = []
|
self._savedABCD = []
|
||||||
|
|
||||||
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
|
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs)
|
||||||
|
self._set_params(self._get_params())
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def oldps(self):
|
def oldps(self):
|
||||||
|
|
@ -96,7 +97,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
|
|
||||||
def _clipped(self, x):
|
def _clipped(self, x):
|
||||||
return x # np.clip(x, -1e300, 1e300)
|
return x # np.clip(x, -1e300, 1e300)
|
||||||
|
|
||||||
def _set_params(self, x, save_old=True, save_count=0):
|
def _set_params(self, x, save_old=True, save_count=0):
|
||||||
# try:
|
# try:
|
||||||
x = self._clipped(x)
|
x = self._clipped(x)
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify,pdinv
|
||||||
from ..util.plot import gpplot
|
from ..util.plot import gpplot
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from scipy import stats, linalg
|
from scipy import stats, linalg
|
||||||
from sparse_GP import sparse_GP
|
from ..core import sparse_GP
|
||||||
|
|
||||||
def backsub_both_sides(L,X):
|
def backsub_both_sides(L,X):
|
||||||
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
|
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
|
||||||
|
|
@ -16,6 +16,9 @@ def backsub_both_sides(L,X):
|
||||||
|
|
||||||
class FITC(sparse_GP):
|
class FITC(sparse_GP):
|
||||||
|
|
||||||
|
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
|
||||||
|
super(FITC, self).__init__(X, likelihood, kernel, normalize_X=normalize_X)
|
||||||
|
|
||||||
def update_likelihood_approximation(self):
|
def update_likelihood_approximation(self):
|
||||||
"""
|
"""
|
||||||
Approximates a non-gaussian likelihood using Expectation Propagation
|
Approximates a non-gaussian likelihood using Expectation Propagation
|
||||||
|
|
|
||||||
288
GPy/models/GP.py
288
GPy/models/GP.py
|
|
@ -1,288 +0,0 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import linalg
|
|
||||||
import pylab as pb
|
|
||||||
from .. import kern
|
|
||||||
from ..core import model
|
|
||||||
from ..util.linalg import pdinv, mdot, tdot
|
|
||||||
from ..util.plot import gpplot, x_frame1D, x_frame2D, Tango
|
|
||||||
from ..likelihoods import EP
|
|
||||||
|
|
||||||
class GP(model):
|
|
||||||
"""
|
|
||||||
Gaussian Process model for regression and EP
|
|
||||||
|
|
||||||
:param X: input observations
|
|
||||||
:param kernel: a GPy kernel, defaults to rbf+white
|
|
||||||
:parm likelihood: a GPy likelihood
|
|
||||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
|
||||||
:type normalize_X: False|True
|
|
||||||
:rtype: model object
|
|
||||||
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
|
|
||||||
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
|
|
||||||
:type powerep: list
|
|
||||||
|
|
||||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
|
||||||
|
|
||||||
"""
|
|
||||||
def __init__(self, X, likelihood, kernel, normalize_X=False):
|
|
||||||
|
|
||||||
# parse arguments
|
|
||||||
self.X = X
|
|
||||||
assert len(self.X.shape) == 2
|
|
||||||
self.N, self.Q = self.X.shape
|
|
||||||
assert isinstance(kernel, kern.kern)
|
|
||||||
self.kern = kernel
|
|
||||||
self.likelihood = likelihood
|
|
||||||
assert self.X.shape[0] == self.likelihood.data.shape[0]
|
|
||||||
self.N, self.D = self.likelihood.data.shape
|
|
||||||
|
|
||||||
# here's some simple normalization for the inputs
|
|
||||||
if normalize_X:
|
|
||||||
self._Xmean = X.mean(0)[None, :]
|
|
||||||
self._Xstd = X.std(0)[None, :]
|
|
||||||
self.X = (X.copy() - self._Xmean) / self._Xstd
|
|
||||||
if hasattr(self, 'Z'):
|
|
||||||
self.Z = (self.Z - self._Xmean) / self._Xstd
|
|
||||||
else:
|
|
||||||
self._Xmean = np.zeros((1, self.X.shape[1]))
|
|
||||||
self._Xstd = np.ones((1, self.X.shape[1]))
|
|
||||||
|
|
||||||
if not hasattr(self,'has_uncertain_inputs'):
|
|
||||||
self.has_uncertain_inputs = False
|
|
||||||
model.__init__(self)
|
|
||||||
|
|
||||||
def dL_dZ(self):
|
|
||||||
"""
|
|
||||||
TODO: one day we might like to learn Z by gradient methods?
|
|
||||||
"""
|
|
||||||
#FIXME: this doesn;t live here.
|
|
||||||
return np.zeros_like(self.Z)
|
|
||||||
|
|
||||||
def _set_params(self, p):
|
|
||||||
self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
|
|
||||||
self.likelihood._set_params(p[self.kern.Nparam_transformed():])
|
|
||||||
|
|
||||||
self.K = self.kern.K(self.X)
|
|
||||||
self.K += self.likelihood.covariance_matrix
|
|
||||||
|
|
||||||
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
|
|
||||||
|
|
||||||
# the gradient of the likelihood wrt the covariance matrix
|
|
||||||
if self.likelihood.YYT is None:
|
|
||||||
#alpha = np.dot(self.Ki, self.likelihood.Y)
|
|
||||||
alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1)
|
|
||||||
|
|
||||||
self.dL_dK = 0.5 * (tdot(alpha) - self.D * self.Ki)
|
|
||||||
else:
|
|
||||||
#tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
|
|
||||||
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
|
|
||||||
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
|
|
||||||
self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
|
|
||||||
|
|
||||||
def _get_params(self):
|
|
||||||
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
|
||||||
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
|
|
||||||
|
|
||||||
def update_likelihood_approximation(self):
|
|
||||||
"""
|
|
||||||
Approximates a non-gaussian likelihood using Expectation Propagation
|
|
||||||
|
|
||||||
For a Gaussian likelihood, no iteration is required:
|
|
||||||
this function does nothing
|
|
||||||
"""
|
|
||||||
self.likelihood.fit_full(self.kern.K(self.X))
|
|
||||||
self._set_params(self._get_params()) # update the GP
|
|
||||||
|
|
||||||
def _model_fit_term(self):
|
|
||||||
"""
|
|
||||||
Computes the model fit using YYT if it's available
|
|
||||||
"""
|
|
||||||
if self.likelihood.YYT is None:
|
|
||||||
tmp, _ = linalg.lapack.flapack.dtrtrs(self.L, np.asfortranarray(self.likelihood.Y), lower=1)
|
|
||||||
return -0.5 * np.sum(np.square(tmp))
|
|
||||||
#return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
|
|
||||||
else:
|
|
||||||
return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
|
||||||
"""
|
|
||||||
The log marginal likelihood of the GP.
|
|
||||||
|
|
||||||
For an EP model, can be written as the log likelihood of a regression
|
|
||||||
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
|
|
||||||
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
|
|
||||||
"""
|
|
||||||
return -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z
|
|
||||||
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
|
||||||
"""
|
|
||||||
The gradient of all parameters.
|
|
||||||
|
|
||||||
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
|
|
||||||
"""
|
|
||||||
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
|
||||||
|
|
||||||
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False,stop=False):
|
|
||||||
"""
|
|
||||||
Internal helper function for making predictions, does not account
|
|
||||||
for normalization or likelihood
|
|
||||||
"""
|
|
||||||
Kx = self.kern.K(_Xnew,self.X,which_parts=which_parts).T
|
|
||||||
#KiKx = np.dot(self.Ki, Kx)
|
|
||||||
KiKx, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(Kx), lower=1)
|
|
||||||
mu = np.dot(KiKx.T, self.likelihood.Y)
|
|
||||||
if full_cov:
|
|
||||||
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
|
|
||||||
var = Kxx - np.dot(KiKx.T, Kx)
|
|
||||||
else:
|
|
||||||
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
|
|
||||||
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
|
|
||||||
var = var[:, None]
|
|
||||||
if stop:
|
|
||||||
debug_this
|
|
||||||
return mu, var
|
|
||||||
|
|
||||||
|
|
||||||
def predict(self, Xnew, which_parts='all', full_cov=False):
|
|
||||||
"""
|
|
||||||
Predict the function(s) at the new point(s) Xnew.
|
|
||||||
|
|
||||||
Arguments
|
|
||||||
---------
|
|
||||||
:param Xnew: The points at which to make a prediction
|
|
||||||
:type Xnew: np.ndarray, Nnew x self.Q
|
|
||||||
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
|
||||||
:type which_parts: ('all', list of bools)
|
|
||||||
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
|
|
||||||
:type full_cov: bool
|
|
||||||
:rtype: posterior mean, a Numpy array, Nnew x self.D
|
|
||||||
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
|
||||||
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D
|
|
||||||
|
|
||||||
|
|
||||||
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
|
|
||||||
This is to allow for different normalizations of the output dimensions.
|
|
||||||
|
|
||||||
"""
|
|
||||||
# normalize X values
|
|
||||||
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
|
|
||||||
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
|
|
||||||
|
|
||||||
# now push through likelihood
|
|
||||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
|
|
||||||
|
|
||||||
return mean, var, _025pm, _975pm
|
|
||||||
|
|
||||||
|
|
||||||
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False):
|
|
||||||
"""
|
|
||||||
Plot the GP's view of the world, where the data is normalized and the
|
|
||||||
likelihood is Gaussian.
|
|
||||||
|
|
||||||
:param samples: the number of a posteriori samples to plot
|
|
||||||
:param which_data: which if the training data to plot (default all)
|
|
||||||
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
|
||||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
|
||||||
:param which_parts: which of the kernel functions to plot (additively)
|
|
||||||
:type which_parts: 'all', or list of bools
|
|
||||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
|
||||||
|
|
||||||
Plot the posterior of the GP.
|
|
||||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
|
||||||
- In two dimsensions, a contour-plot shows the mean predicted function
|
|
||||||
- In higher dimensions, we've no implemented this yet !TODO!
|
|
||||||
|
|
||||||
Can plot only part of the data and part of the posterior functions
|
|
||||||
using which_data and which_functions
|
|
||||||
"""
|
|
||||||
if which_data == 'all':
|
|
||||||
which_data = slice(None)
|
|
||||||
|
|
||||||
if self.X.shape[1] == 1:
|
|
||||||
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
|
|
||||||
if samples == 0:
|
|
||||||
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
|
||||||
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v))
|
|
||||||
pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
|
|
||||||
else:
|
|
||||||
m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
|
|
||||||
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
|
|
||||||
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None])
|
|
||||||
for i in range(samples):
|
|
||||||
pb.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
|
||||||
pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
|
|
||||||
pb.xlim(xmin, xmax)
|
|
||||||
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
|
|
||||||
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
|
||||||
pb.ylim(ymin, ymax)
|
|
||||||
if hasattr(self, 'Z'):
|
|
||||||
pb.plot(self.Z, self.Z * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
|
|
||||||
|
|
||||||
elif self.X.shape[1] == 2:
|
|
||||||
resolution = resolution or 50
|
|
||||||
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
|
|
||||||
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
|
||||||
m = m.reshape(resolution, resolution).T
|
|
||||||
pb.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
|
||||||
pb.scatter(Xorig[:, 0], Xorig[:, 1], 40, Yorig, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max())
|
|
||||||
pb.xlim(xmin[0], xmax[0])
|
|
||||||
pb.ylim(xmin[1], xmax[1])
|
|
||||||
else:
|
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
|
||||||
|
|
||||||
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20):
|
|
||||||
"""
|
|
||||||
TODO: Docstrings!
|
|
||||||
:param levels: for 2D plotting, the number of contour levels to use
|
|
||||||
|
|
||||||
"""
|
|
||||||
# TODO include samples
|
|
||||||
if which_data == 'all':
|
|
||||||
which_data = slice(None)
|
|
||||||
|
|
||||||
if self.X.shape[1] == 1:
|
|
||||||
|
|
||||||
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
|
|
||||||
|
|
||||||
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
|
|
||||||
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
|
||||||
for d in range(m.shape[1]):
|
|
||||||
gpplot(Xnew, m[:,d], lower[:,d], upper[:,d])
|
|
||||||
pb.plot(Xu[which_data], self.likelihood.data[which_data,d], 'kx', mew=1.5)
|
|
||||||
if self.has_uncertain_inputs:
|
|
||||||
pb.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
|
|
||||||
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
|
|
||||||
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
|
||||||
|
|
||||||
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
|
|
||||||
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
|
||||||
pb.xlim(xmin, xmax)
|
|
||||||
pb.ylim(ymin, ymax)
|
|
||||||
if hasattr(self, 'Z'):
|
|
||||||
Zu = self.Z * self._Xstd + self._Xmean
|
|
||||||
pb.plot(Zu, Zu * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
|
|
||||||
# pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten()))
|
|
||||||
|
|
||||||
elif self.X.shape[1] == 2: # FIXME
|
|
||||||
resolution = resolution or 50
|
|
||||||
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
|
|
||||||
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
|
|
||||||
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
|
||||||
m = m.reshape(resolution, resolution).T
|
|
||||||
pb.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
|
||||||
Yf = self.likelihood.Y.flatten()
|
|
||||||
pb.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
|
|
||||||
pb.xlim(xmin[0], xmax[0])
|
|
||||||
pb.ylim(xmin[1], xmax[1])
|
|
||||||
if hasattr(self, 'Z'):
|
|
||||||
pb.plot(self.Z[:, 0], self.Z[:, 1], 'wo')
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
|
||||||
|
|
@ -8,7 +8,7 @@ import sys, pdb
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..core import model
|
from ..core import model
|
||||||
from ..util.linalg import pdinv, PCA
|
from ..util.linalg import pdinv, PCA
|
||||||
from GP import GP
|
from ..core import GP
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
from .. import util
|
from .. import util
|
||||||
from GPy.util import plot_latent
|
from GPy.util import plot_latent
|
||||||
|
|
@ -26,13 +26,14 @@ class GPLVM(GP):
|
||||||
:type init: 'PCA'|'random'
|
:type init: 'PCA'|'random'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, Y, Q, init='PCA', X = None, kernel=None, normalize_Y=False, **kwargs):
|
def __init__(self, Y, Q, init='PCA', X = None, kernel=None, normalize_Y=False):
|
||||||
if X is None:
|
if X is None:
|
||||||
X = self.initialise_latent(init, Q, Y)
|
X = self.initialise_latent(init, Q, Y)
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(Q, ARD=Q>1) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
|
kernel = kern.rbf(Q, ARD=Q>1) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
|
||||||
likelihood = Gaussian(Y, normalize=normalize_Y)
|
likelihood = Gaussian(Y, normalize=normalize_Y)
|
||||||
GP.__init__(self, X, likelihood, kernel, **kwargs)
|
GP.__init__(self, X, likelihood, kernel, normalize_X=False)
|
||||||
|
self._set_params(self._get_params())
|
||||||
|
|
||||||
def initialise_latent(self, init, Q, Y):
|
def initialise_latent(self, init, Q, Y):
|
||||||
if init == 'PCA':
|
if init == 'PCA':
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from GP import GP
|
from ..core import GP
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
from .. import kern
|
from .. import kern
|
||||||
|
|
||||||
|
|
@ -32,3 +32,4 @@ class GP_regression(GP):
|
||||||
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
||||||
|
|
||||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||||
|
self._set_params(self._get_params())
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,9 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
from GP import GP
|
#from GP import GP
|
||||||
|
#from sparse_GP import sparse_GP
|
||||||
from GP_regression import GP_regression
|
from GP_regression import GP_regression
|
||||||
from sparse_GP import sparse_GP
|
|
||||||
from sparse_GP_regression import sparse_GP_regression
|
from sparse_GP_regression import sparse_GP_regression
|
||||||
from GPLVM import GPLVM
|
from GPLVM import GPLVM
|
||||||
from warped_GP import warpedGP
|
from warped_GP import warpedGP
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
|
||||||
from ..util.plot import gpplot
|
from ..util.plot import gpplot
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from scipy import stats, linalg
|
from scipy import stats, linalg
|
||||||
from sparse_GP import sparse_GP
|
from ..core import sparse_GP
|
||||||
|
|
||||||
def backsub_both_sides(L,X):
|
def backsub_both_sides(L,X):
|
||||||
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
|
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
|
||||||
|
|
@ -36,12 +36,12 @@ class generalized_FITC(sparse_GP):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
|
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
|
||||||
|
|
||||||
self.Z = Z
|
self.Z = Z
|
||||||
self.M = self.Z.shape[0]
|
self.M = self.Z.shape[0]
|
||||||
self.true_precision = likelihood.precision
|
self.true_precision = likelihood.precision
|
||||||
|
|
||||||
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
|
super(generalized_FITC, self).__init__(X, likelihood, kernel=kernel, Z=self.Z, X_variance=X_variance, normalize_X=normalize_X)
|
||||||
|
self._set_params(self._get_params())
|
||||||
|
|
||||||
def _set_params(self, p):
|
def _set_params(self, p):
|
||||||
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
|
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ Created on 10 Apr 2013
|
||||||
'''
|
'''
|
||||||
from GPy.core import model
|
from GPy.core import model
|
||||||
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
|
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
|
||||||
from GPy.models.sparse_GP import sparse_GP
|
from GPy.core import sparse_GP
|
||||||
from GPy.util.linalg import PCA
|
from GPy.util.linalg import PCA
|
||||||
from scipy import linalg
|
from scipy import linalg
|
||||||
import numpy
|
import numpy
|
||||||
|
|
@ -23,7 +23,7 @@ class MRD(model):
|
||||||
:type likelihood_list: [GPy.likelihood] | [Y1..Yy]
|
:type likelihood_list: [GPy.likelihood] | [Y1..Yy]
|
||||||
:param names: names for different gplvm models
|
:param names: names for different gplvm models
|
||||||
:type names: [str]
|
:type names: [str]
|
||||||
:param Q: latent dimensionality (will raise
|
:param Q: latent dimensionality (will raise
|
||||||
:type Q: int
|
:type Q: int
|
||||||
:param initx: initialisation method for the latent space
|
:param initx: initialisation method for the latent space
|
||||||
:type initx: 'PCA'|'random'
|
:type initx: 'PCA'|'random'
|
||||||
|
|
@ -33,8 +33,11 @@ class MRD(model):
|
||||||
Initial latent space
|
Initial latent space
|
||||||
:param X_variance:
|
:param X_variance:
|
||||||
Initial latent space variance
|
Initial latent space variance
|
||||||
:param init: [PCA|random]
|
:param init: [cooncat|single|random]
|
||||||
initialization method to use
|
initialization method to use:
|
||||||
|
*concat: PCA on concatenated outputs
|
||||||
|
*single: PCA on each output
|
||||||
|
*random: random
|
||||||
:param M:
|
:param M:
|
||||||
number of inducing inputs to use
|
number of inducing inputs to use
|
||||||
:param Z:
|
:param Z:
|
||||||
|
|
@ -77,6 +80,7 @@ class MRD(model):
|
||||||
self.MQ = self.M * self.Q
|
self.MQ = self.M * self.Q
|
||||||
|
|
||||||
model.__init__(self) # @UndefinedVariable
|
model.__init__(self) # @UndefinedVariable
|
||||||
|
self._set_params(self._get_params())
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def X(self):
|
def X(self):
|
||||||
|
|
@ -153,7 +157,7 @@ class MRD(model):
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
"""
|
"""
|
||||||
return parameter list containing private and shared parameters as follows:
|
return parameter list containing private and shared parameters as follows:
|
||||||
|
|
||||||
=================================================================
|
=================================================================
|
||||||
| mu | S | Z || theta1 | theta2 | .. | thetaN |
|
| mu | S | Z || theta1 | theta2 | .. | thetaN |
|
||||||
=================================================================
|
=================================================================
|
||||||
|
|
|
||||||
|
|
@ -23,9 +23,9 @@ class sparse_GPLVM(sparse_GP_regression, GPLVM):
|
||||||
:type init: 'PCA'|'random'
|
:type init: 'PCA'|'random'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, Y, Q, init='PCA', **kwargs):
|
def __init__(self, Y, Q, kernel=None, init='PCA', M=10):
|
||||||
X = self.initialise_latent(init, Q, Y)
|
X = self.initialise_latent(init, Q, Y)
|
||||||
sparse_GP_regression.__init__(self, X, Y, **kwargs)
|
sparse_GP_regression.__init__(self, X, Y, kernel=kernel,M=M)
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
return (sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[])
|
return (sum([['X_%i_%i'%(n,q) for q in range(self.Q)] for n in range(self.N)],[])
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sparse_GP import sparse_GP
|
from ..core import sparse_GP
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..likelihoods import likelihood
|
from ..likelihoods import likelihood
|
||||||
|
|
@ -43,4 +43,5 @@ class sparse_GP_regression(sparse_GP):
|
||||||
#likelihood defaults to Gaussian
|
#likelihood defaults to Gaussian
|
||||||
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
||||||
|
|
||||||
sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X)
|
sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X)
|
||||||
|
self._set_params(self._get_params())
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from ..util.linalg import pdinv
|
||||||
from ..util.plot import gpplot
|
from ..util.plot import gpplot
|
||||||
from ..util.warping_functions import *
|
from ..util.warping_functions import *
|
||||||
from GP_regression import GP_regression
|
from GP_regression import GP_regression
|
||||||
from GP import GP
|
from ..core import GP
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
from .. import kern
|
from .. import kern
|
||||||
|
|
||||||
|
|
@ -30,6 +30,7 @@ class warpedGP(GP):
|
||||||
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
|
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
|
||||||
|
|
||||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||||
|
self._set_params(self._get_params())
|
||||||
|
|
||||||
def _scale_data(self, Y):
|
def _scale_data(self, Y):
|
||||||
self._Ymax = Y.max()
|
self._Ymax = Y.max()
|
||||||
|
|
|
||||||
|
|
@ -1,61 +0,0 @@
|
||||||
the predict method for GP_regression returns a covariance matrix which is a bad idea as this takes a lot to compute, it's also confusing for first time users. Should only be returned if the user explicitly requests it.
|
|
||||||
FIXED
|
|
||||||
|
|
||||||
When computing kernel.K for kernels like rbf, you can't compute a version with rbf.K(X) you have to do rbf.K(X, X)
|
|
||||||
FIXED
|
|
||||||
|
|
||||||
Change Youter to YYT (Youter doesn't mean anything for matrices).
|
|
||||||
FIXED
|
|
||||||
|
|
||||||
Change get_param and set_param to get_params and set_params
|
|
||||||
FIXED
|
|
||||||
|
|
||||||
Fails in weird ways if you pass a integer as the input instead of a double to the kernel.
|
|
||||||
FIXED
|
|
||||||
|
|
||||||
The Matern kernels (at least the 52) still is working in the ARD manner which means it wouldn't run for very large input dimension. Needs to be fixed to match the RBF.
|
|
||||||
FIXED
|
|
||||||
|
|
||||||
Implementing new covariances is too complicated at the moment. We need a barebones example of what to implement and where. Commenting in the covariance matrices needs to be improved. It's not clear to a user what all the psi parts are for. Maybe we need a cut down and simplified example to help with this (perhaps a cut down version of the RBF?). And then we should provide a simple list of what you need to do to get a new kernel going.
|
|
||||||
TODO, a priority for this release
|
|
||||||
|
|
||||||
Missing kernels: polynomial, rational quadratic.
|
|
||||||
TODO, should be straightforward when the above is fixed.
|
|
||||||
|
|
||||||
Need an implementation of scaled conjugate gradients for the optimizers.
|
|
||||||
UPSTREAM: scipy are tidying up the optimize module. let's wait for their next release.
|
|
||||||
|
|
||||||
Need an implementation of gradient descent for the optimizers (works well with GP-LVM for small random initializations)
|
|
||||||
As above.
|
|
||||||
|
|
||||||
Need Carl Rasmussen's permission to add his conjugate gradients algorithm. In fact, we can just provide a hook for it, and post a separate python implementation of his algorithm.
|
|
||||||
Any word from Carl yet?
|
|
||||||
|
|
||||||
Get constrain param by default inside model creation.
|
|
||||||
Well, we have ensure_default_constraints. There are some techinical difficulties in doing it inside model creation, so perhaps this is something for a later release.
|
|
||||||
|
|
||||||
Bug when running classification.crescent_data()
|
|
||||||
TODO.
|
|
||||||
|
|
||||||
Do all optimizers work only in terms of function evaluations? Do we need to check for one that uses iterations?
|
|
||||||
Upstream: Waiting for the new scipy, where the optimisers have been unified. Obviously it's be much better to be able to specify a unified set of args.
|
|
||||||
|
|
||||||
Tolerances for optimizers, do we need to introduce some standardization? At the moment does each have its own defaults?
|
|
||||||
Upstream, as above
|
|
||||||
|
|
||||||
A dictionary for parameter storage? So we can go through names easily?
|
|
||||||
Wontfix. Dictionaries bring up all kinds of problems since they're not ordered. it's easy enough to do:
|
|
||||||
for val, name in zip(m._get_params(), m._get_param_names()): foobar
|
|
||||||
|
|
||||||
A flag on covariance functions that indicates when they are not associated with an underlying function (like white noise or a coregionalization matrix).
|
|
||||||
TODO, agree this would be helpful.
|
|
||||||
|
|
||||||
Diagonal noise covariance function
|
|
||||||
TODO this is now straightforward using the likelihood framework, or as a kern. NF also requires a similar kind of kern function (a fixed form kernel)
|
|
||||||
|
|
||||||
Long term: automatic Lagrange multiplier calculation for optimizers: constrain two parameters in an unusual way and the model automatically does the Lagrangian. Also augment the parameters with new ones, so define data variance to be white noise plus RBF variance and optimize over that and signal to noise ratio ... for example constrain the sum of variances to equal the known variance of the data.
|
|
||||||
|
|
||||||
Randomize doesn't seem to cover a wide enough range for restarts ... try it for a model where inputs are widely spaced apart and length scale is too short. Sampling from N(0,1) is too conservative. Dangerous for people who naively use restarts. Since we have the model we could maybe come up with some sensible heuristics for setting these things. Maybe we should also consider having '.initialize()'. If we can't do this well we should disable the restart method.
|
|
||||||
Excellent proposal, but lots of work: suggest leaving for the next release?
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -8,7 +8,7 @@ import GPy
|
||||||
class KernelTests(unittest.TestCase):
|
class KernelTests(unittest.TestCase):
|
||||||
def test_kerneltie(self):
|
def test_kerneltie(self):
|
||||||
K = GPy.kern.rbf(5, ARD=True)
|
K = GPy.kern.rbf(5, ARD=True)
|
||||||
K.tie_params('[01]')
|
K.tie_params('.*[01]')
|
||||||
K.constrain_fixed('2')
|
K.constrain_fixed('2')
|
||||||
X = np.random.rand(5,5)
|
X = np.random.rand(5,5)
|
||||||
Y = np.ones((5,1))
|
Y = np.ones((5,1))
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ class GradientTests(unittest.TestCase):
|
||||||
self.X2D = np.random.uniform(-3.,3.,(40,2))
|
self.X2D = np.random.uniform(-3.,3.,(40,2))
|
||||||
self.Y2D = np.sin(self.X2D[:,0:1]) * np.sin(self.X2D[:,1:2])+np.random.randn(40,1)*0.05
|
self.Y2D = np.sin(self.X2D[:,0:1]) * np.sin(self.X2D[:,1:2])+np.random.randn(40,1)*0.05
|
||||||
|
|
||||||
def check_model_with_white(self, kern, model_type='GP_regression', dimension=1, constraint=''):
|
def check_model_with_white(self, kern, model_type='GP_regression', dimension=1):
|
||||||
#Get the correct gradients
|
#Get the correct gradients
|
||||||
if dimension == 1:
|
if dimension == 1:
|
||||||
X = self.X1D
|
X = self.X1D
|
||||||
|
|
@ -37,7 +37,7 @@ class GradientTests(unittest.TestCase):
|
||||||
noise = GPy.kern.white(dimension)
|
noise = GPy.kern.white(dimension)
|
||||||
kern = kern + noise
|
kern = kern + noise
|
||||||
m = model_fit(X, Y, kernel=kern)
|
m = model_fit(X, Y, kernel=kern)
|
||||||
m.constrain_positive(constraint)
|
m.ensure_default_constraints()
|
||||||
m.randomize()
|
m.randomize()
|
||||||
# contrain all parameters to be positive
|
# contrain all parameters to be positive
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
@ -135,12 +135,12 @@ class GradientTests(unittest.TestCase):
|
||||||
def test_sparse_GP_regression_rbf_white_kern_1d(self):
|
def test_sparse_GP_regression_rbf_white_kern_1d(self):
|
||||||
''' Testing the sparse GP regression with rbf kernel with white kernel on 1d data '''
|
''' Testing the sparse GP regression with rbf kernel with white kernel on 1d data '''
|
||||||
rbf = GPy.kern.rbf(1)
|
rbf = GPy.kern.rbf(1)
|
||||||
self.check_model_with_white(rbf, model_type='sparse_GP_regression', dimension=1, constraint='(variance|lengthscale|precision)')
|
self.check_model_with_white(rbf, model_type='sparse_GP_regression', dimension=1)
|
||||||
|
|
||||||
def test_sparse_GP_regression_rbf_white_kern_2D(self):
|
def test_sparse_GP_regression_rbf_white_kern_2D(self):
|
||||||
''' Testing the sparse GP regression with rbf and white kernel on 2d data '''
|
''' Testing the sparse GP regression with rbf and white kernel on 2d data '''
|
||||||
rbf = GPy.kern.rbf(2)
|
rbf = GPy.kern.rbf(2)
|
||||||
self.check_model_with_white(rbf, model_type='sparse_GP_regression', dimension=2, constraint='(variance|lengthscale|precision)')
|
self.check_model_with_white(rbf, model_type='sparse_GP_regression', dimension=2)
|
||||||
|
|
||||||
def test_GPLVM_rbf_bias_white_kern_2D(self):
|
def test_GPLVM_rbf_bias_white_kern_2D(self):
|
||||||
""" Testing GPLVM with rbf + bias and white kernel """
|
""" Testing GPLVM with rbf + bias and white kernel """
|
||||||
|
|
@ -150,7 +150,7 @@ class GradientTests(unittest.TestCase):
|
||||||
K = k.K(X)
|
K = k.K(X)
|
||||||
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
|
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
|
||||||
m = GPy.models.GPLVM(Y, Q, kernel = k)
|
m = GPy.models.GPLVM(Y, Q, kernel = k)
|
||||||
m.constrain_positive('(rbf|bias|white)')
|
m.ensure_default_constraints()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
def test_GPLVM_rbf_linear_white_kern_2D(self):
|
def test_GPLVM_rbf_linear_white_kern_2D(self):
|
||||||
|
|
@ -161,7 +161,7 @@ class GradientTests(unittest.TestCase):
|
||||||
K = k.K(X)
|
K = k.K(X)
|
||||||
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
|
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
|
||||||
m = GPy.models.GPLVM(Y, Q, init = 'PCA', kernel = k)
|
m = GPy.models.GPLVM(Y, Q, init = 'PCA', kernel = k)
|
||||||
m.constrain_positive('(linear|bias|white)')
|
m.ensure_default_constraints()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
||||||
def test_GP_EP_probit(self):
|
def test_GP_EP_probit(self):
|
||||||
|
|
@ -171,7 +171,7 @@ class GradientTests(unittest.TestCase):
|
||||||
kernel = GPy.kern.rbf(1)
|
kernel = GPy.kern.rbf(1)
|
||||||
distribution = GPy.likelihoods.likelihood_functions.probit()
|
distribution = GPy.likelihoods.likelihood_functions.probit()
|
||||||
likelihood = GPy.likelihoods.EP(Y, distribution)
|
likelihood = GPy.likelihoods.EP(Y, distribution)
|
||||||
m = GPy.models.GP(X, likelihood, kernel)
|
m = GPy.core.GP(X, likelihood, kernel)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m.update_likelihood_approximation()
|
m.update_likelihood_approximation()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
@ -185,7 +185,7 @@ class GradientTests(unittest.TestCase):
|
||||||
kernel = GPy.kern.rbf(1)
|
kernel = GPy.kern.rbf(1)
|
||||||
distribution = GPy.likelihoods.likelihood_functions.probit()
|
distribution = GPy.likelihoods.likelihood_functions.probit()
|
||||||
likelihood = GPy.likelihoods.EP(Y, distribution)
|
likelihood = GPy.likelihoods.EP(Y, distribution)
|
||||||
m = GPy.models.sparse_GP(X, likelihood, kernel,Z)
|
m = GPy.core.sparse_GP(X, likelihood, kernel,Z)
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
m.update_likelihood_approximation()
|
m.update_likelihood_approximation()
|
||||||
self.assertTrue(m.checkgrad())
|
self.assertTrue(m.checkgrad())
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# #Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
4
MANIFEST.in
Normal file
4
MANIFEST.in
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
include *.txt
|
||||||
|
recursive-include docs *.txt
|
||||||
|
include *.md
|
||||||
|
recursive-include docs *.md
|
||||||
6
setup.py
6
setup.py
|
|
@ -23,14 +23,12 @@ setup(name = 'GPy',
|
||||||
package_data = {'GPy': ['GPy/examples']},
|
package_data = {'GPy': ['GPy/examples']},
|
||||||
py_modules = ['GPy.__init__'],
|
py_modules = ['GPy.__init__'],
|
||||||
long_description=read('README.md'),
|
long_description=read('README.md'),
|
||||||
#ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py',
|
|
||||||
# sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
|
|
||||||
install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1', 'nose'],
|
install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1', 'nose'],
|
||||||
extras_require = {
|
extras_require = {
|
||||||
'docs':['Sphinx', 'ipython'],
|
'docs':['Sphinx', 'ipython'],
|
||||||
},
|
},
|
||||||
#setup_requires=['sphinx'],
|
|
||||||
#cmdclass = {'build_sphinx': BuildDoc},
|
|
||||||
classifiers=[
|
classifiers=[
|
||||||
"License :: OSI Approved :: BSD License"],
|
"License :: OSI Approved :: BSD License"],
|
||||||
|
#ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py',
|
||||||
|
# sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue