Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Alan Saul 2015-04-23 17:00:25 +01:00
commit ac4972ff99
6 changed files with 350 additions and 33 deletions

View file

@ -728,6 +728,254 @@ class DGPLVM(Prior):
return 'DGPLVM_prior_Raq' return 'DGPLVM_prior_Raq'
# ******************************************
from parameterized import Parameterized
from .. import Param
class DGPLVM_Lamda(Prior, Parameterized):
"""
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
:param sigma2: constant
.. Note:: DGPLVM for Classification paper implementation
"""
domain = _REAL
# _instances = []
# def __new__(cls, mu, sigma): # Singleton:
# if cls._instances:
# cls._instances[:] = [instance for instance in cls._instances if instance()]
# for instance in cls._instances:
# if instance().mu == mu and instance().sigma == sigma:
# return instance()
# o = super(Prior, cls).__new__(cls, mu, sigma)
# cls._instances.append(weakref.ref(o))
# return cls._instances[-1]()
def __init__(self, sigma2, lbl, x_shape, lamda, name='DP_prior'):
super(DGPLVM_Lamda, self).__init__(name=name)
self.sigma2 = sigma2
# self.x = x
self.lbl = lbl
self.lamda = lamda
self.classnum = lbl.shape[1]
self.datanum = lbl.shape[0]
self.x_shape = x_shape
self.dim = x_shape[1]
self.lamda = Param('lamda', np.diag(lamda))
self.link_parameter(self.lamda)
def get_class_label(self, y):
for idx, v in enumerate(y):
if v == 1:
return idx
return -1
# This function assigns each data point to its own class
# and returns the dictionary which contains the class name and parameters.
def compute_cls(self, x):
cls = {}
# Appending each data point to its proper class
for j in xrange(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in cls:
cls[class_label] = []
cls[class_label].append(x[j])
return cls
# This function computes mean of each class. The mean is calculated through each dimension
def compute_Mi(self, cls):
M_i = np.zeros((self.classnum, self.dim))
for i in cls:
# Mean of each class
class_i = cls[i]
M_i[i] = np.mean(class_i, axis=0)
return M_i
# Adding data points as tuple to the dictionary so that we can access indices
def compute_indices(self, x):
data_idx = {}
for j in xrange(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in data_idx:
data_idx[class_label] = []
t = (j, x[j])
data_idx[class_label].append(t)
return data_idx
# Adding indices to the list so we can access whole the indices
def compute_listIndices(self, data_idx):
lst_idx = []
lst_idx_all = []
for i in data_idx:
if len(lst_idx) == 0:
pass
#Do nothing, because it is the first time list is created so is empty
else:
lst_idx = []
# Here we put indices of each class in to the list called lst_idx_all
for m in xrange(len(data_idx[i])):
lst_idx.append(data_idx[i][m][0])
lst_idx_all.append(lst_idx)
return lst_idx_all
# This function calculates between classes variances
def compute_Sb(self, cls, M_i, M_0):
Sb = np.zeros((self.dim, self.dim))
for i in cls:
B = (M_i[i] - M_0).reshape(self.dim, 1)
B_trans = B.transpose()
Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
return Sb
# This function calculates within classes variances
def compute_Sw(self, cls, M_i):
Sw = np.zeros((self.dim, self.dim))
for i in cls:
N_i = float(len(cls[i]))
W_WT = np.zeros((self.dim, self.dim))
for xk in cls[i]:
W = (xk - M_i[i])
W_WT += np.outer(W, W)
Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
return Sw
# Calculating beta and Bi for Sb
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
import pdb
# pdb.set_trace()
B_i = np.zeros((self.classnum, self.dim))
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
for i in data_idx:
# pdb.set_trace()
# Calculating Bi
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
for k in xrange(self.datanum):
for i in data_idx:
N_i = float(len(data_idx[i]))
if k in lst_idx_all[i]:
beta = (float(1) / N_i) - (float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
else:
beta = -(float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
return Sig_beta_B_i_all
# Calculating W_j s separately so we can access all the W_j s anytime
def compute_wj(self, data_idx, M_i):
W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
xj = tpl[1]
j = tpl[0]
W_i[j] = (xj - M_i[i])
return W_i
# Calculating alpha and Wj for Sw
def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
k = tpl[0]
for j in lst_idx_all[i]:
if k == j:
alpha = 1 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
else:
alpha = 0 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
return Sig_alpha_W_i
# This function calculates log of our prior
def lnpdf(self, x):
x = x.reshape(self.x_shape)
#!!!!!!!!!!!!!!!!!!!!!!!!!!!
#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
data_idx = self.compute_indices(x)
lst_idx_all = self.compute_listIndices(data_idx)
Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
W_i = self.compute_wj(data_idx, M_i)
Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
# Calculating inverse of Sb and its transpose and minus
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw)
# Calculating DJ/DXk
DJ_Dxk = 2 * (
Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
Sig_alpha_W_i))
# Calculating derivative of the log of the prior
DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
DPxprim_Dx = np.diagflat(self.lamda).dot(DPx_Dx)
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
DPxprim_Dx = DPxprim_Dx.T
DPxprim_Dlamda = DPx_Dx.dot(x)
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
DPxprim_Dlamda = DPxprim_Dlamda.T
self.lamda.gradient = np.diag(DPxprim_Dlamda)
# print DPxprim_Dx
return DPxprim_Dx
# def frb(self, x):
# from functools import partial
# from GPy.models import GradientChecker
# f = partial(self.lnpdf)
# df = partial(self.lnpdf_grad)
# grad = GradientChecker(f, df, x, 'X')
# grad.checkgrad(verbose=1)
def rvs(self, n):
return np.random.rand(n) # A WRONG implementation
def __str__(self):
return 'DGPLVM_prior_Raq_Lamda'
# ******************************************
class DGPLVM_T(Prior): class DGPLVM_T(Prior):
""" """
@ -780,11 +1028,12 @@ class DGPLVM_T(Prior):
return cls return cls
# This function computes mean of each class. The mean is calculated through each dimension # This function computes mean of each class. The mean is calculated through each dimension
def compute_Mi(self, cls, vec): def compute_Mi(self, cls):
M_i = np.zeros((self.classnum, self.dim)) M_i = np.zeros((self.classnum, self.dim))
for i in cls: for i in cls:
# Mean of each class # Mean of each class
class_i = np.multiply(cls[i],vec) # class_i = np.multiply(cls[i],vec)
class_i = cls[i]
M_i[i] = np.mean(class_i, axis=0) M_i[i] = np.mean(class_i, axis=0)
return M_i return M_i
@ -890,9 +1139,12 @@ class DGPLVM_T(Prior):
# This function calculates log of our prior # This function calculates log of our prior
def lnpdf(self, x): def lnpdf(self, x):
x = x.reshape(self.x_shape) x = x.reshape(self.x_shape)
xprim = x.dot(self.vec)
x = xprim
# print x
cls = self.compute_cls(x) cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0) M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls, self.vec) M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0) Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i) Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
@ -905,9 +1157,12 @@ class DGPLVM_T(Prior):
# This function calculates derivative of the log of prior function # This function calculates derivative of the log of prior function
def lnpdf_grad(self, x): def lnpdf_grad(self, x):
x = x.reshape(self.x_shape) x = x.reshape(self.x_shape)
xprim = x.dot(self.vec)
x = xprim
# print x
cls = self.compute_cls(x) cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0) M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls, self.vec) M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0) Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i) Sw = self.compute_Sw(cls, M_i)
data_idx = self.compute_indices(x) data_idx = self.compute_indices(x)

View file

@ -10,6 +10,7 @@ from .parameterization.variational import VariationalPosterior, NormalPosterior
from ..util.linalg import mdot from ..util.linalg import mdot
import logging import logging
import itertools
logger = logging.getLogger("sparse gp") logger = logging.getLogger("sparse gp")
class SparseGP(GP): class SparseGP(GP):
@ -135,7 +136,13 @@ class SparseGP(GP):
var = var var = var
else: else:
Kxx = kern.Kdiag(Xnew) Kxx = kern.Kdiag(Xnew)
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T if self.posterior.woodbury_inv.ndim == 2:
var = Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0)
elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]):
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
var = var
#add in the mean function #add in the mean function
if self.mean_function is not None: if self.mean_function is not None:
mu += self.mean_function.f(Xnew) mu += self.mean_function.f(Xnew)

View file

@ -5,33 +5,59 @@ from ...core.parameterization.param import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
import numpy as np import numpy as np
from ...util.caching import Cache_this from ...util.caching import Cache_this
from ...util.linalg import tdot from ...util.linalg import tdot, mdot
class BasisFuncKernel(Kern): class BasisFuncKernel(Kern):
def __init__(self, input_dim, variance=1., active_dims=None, name='basis func kernel'): def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'):
""" """
Abstract superclass for kernels with explicit basis functions for use in GPy. Abstract superclass for kernels with explicit basis functions for use in GPy.
This class does NOT automatically add an offset to the design matrix phi! This class does NOT automatically add an offset to the design matrix phi!
""" """
super(BasisFuncKernel, self).__init__(input_dim, active_dims, name) super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
self.ARD = ARD
if self.ARD:
phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim)))
variance = variance * np.ones(phi_test.shape[1])
else:
variance = np.array(variance)
self.variance = Param('variance', variance, Logexp()) self.variance = Param('variance', variance, Logexp())
self.link_parameter(self.variance) self.link_parameter(self.variance)
def parameters_changed(self):
self.alpha = np.sqrt(self.variance)
self.beta = 1./self.variance
@Cache_this(limit=3, ignore_args=())
def phi(self, X): def phi(self, X):
raise NotImplementedError('Overwrite this phi function, which maps the input X into the higher dimensional space and forms the design matrix Phi') return self._phi(X)
def _phi(self, X):
raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi')
def K(self, X, X2=None): def K(self, X, X2=None):
return self.variance * self._K(X, X2) return self._K(X, X2)
def Kdiag(self, X, X2=None): def Kdiag(self, X, X2=None):
return self.variance * np.diag(self._K(X, X2)) return np.diag(self._K(X, X2))
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) if self.ARD:
phi1 = self.phi(X)
if X2 is None or X is X2:
self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi1)
else:
phi2 = self.phi(X2)
self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2)
else:
self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) * self.beta
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self._K(X)) if self.ARD:
phi1 = self.phi(X)
self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1)
else:
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta
def concatenate_offset(self, X): def concatenate_offset(self, X):
return np.c_[np.ones((X.shape[0], 1)), X] return np.c_[np.ones((X.shape[0], 1)), X]
@ -52,19 +78,19 @@ class BasisFuncKernel(Kern):
posterior = self._highest_parent_.posterior posterior = self._highest_parent_.posterior
except NameError: except NameError:
raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference") raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference")
phi = self.phi(X) phi_alpha = self.phi(X) * self.variance
return self.variance * phi.T.dot(posterior.woodbury_vector), self.variance * (1 - self.variance * phi.T.dot(posterior.woodbury_inv.dot(phi))) return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha))
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def _K(self, X, X2): def _K(self, X, X2):
if X2 is None or X is X2: if X2 is None or X is X2:
phi = self.phi(X) phi = self.phi(X) * self.alpha
if phi.ndim != 2: if phi.ndim != 2:
phi = phi[:, None] phi = phi[:, None]
return tdot(phi) return tdot(phi)
else: else:
phi1 = self.phi(X) phi1 = self.phi(X) * self.alpha
phi2 = self.phi(X2) phi2 = self.phi(X2) * self.alpha
if phi1.ndim != 2: if phi1.ndim != 2:
phi1 = phi1[:, None] phi1 = phi1[:, None]
phi2 = phi2[:, None] phi2 = phi2[:, None]
@ -72,30 +98,41 @@ class BasisFuncKernel(Kern):
class LinearSlopeBasisFuncKernel(BasisFuncKernel): class LinearSlopeBasisFuncKernel(BasisFuncKernel):
def __init__(self, input_dim, start, stop, variance=1., active_dims=None, name='linear_segment'): def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'):
super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name) """
A linear segment transformation. The segments start at start, \
are then linear to stop and constant again. The segments are
normalized, so that they have exactly as much mass above
as below the origin.
Start and stop can be tuples or lists of starts and stops.
Behaviour of start stop is as np.where(X<start) would do.
"""
self.start = np.array(start) self.start = np.array(start)
self.stop = np.array(stop) self.stop = np.array(stop)
super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def phi(self, X): def _phi(self, X):
phi = np.where(X < self.start, self.start, X) phi = np.where(X < self.start, self.start, X)
phi = np.where(phi > self.stop, self.stop, phi) phi = np.where(phi > self.stop, self.stop, phi)
return ((phi-self.start)/(self.stop-self.start))-.5 return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.
return self.concatenate_offset(phi) # ((phi-self.start)/(self.stop-self.start))-.5
class ChangePointBasisFuncKernel(BasisFuncKernel): class ChangePointBasisFuncKernel(BasisFuncKernel):
def __init__(self, input_dim, changepoint, variance=1., active_dims=None, name='changepoint'): def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'):
super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, name)
self.changepoint = changepoint self.changepoint = changepoint
super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def phi(self, X): def _phi(self, X):
return self.concatenate_offset(np.where((X < self.changepoint), -1, 1)) return np.where((X < self.changepoint), -1, 1)
class DomainKernel(LinearSlopeBasisFuncKernel): class DomainKernel(LinearSlopeBasisFuncKernel):
def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'):
super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def phi(self, X): def _phi(self, X):
phi = np.where((X>self.start)*(X<self.stop), 1., 0.) phi = np.where((X>self.start)*(X<self.stop), 1, 0)
return phi#((phi-self.start)/(self.stop-self.start))-.5 return phi#((phi-self.start)/(self.stop-self.start))-.5
return self.concatenate_offset(phi) # ((phi-self.start)/(self.stop-self.start))-.5

View file

@ -68,8 +68,6 @@ class Periodic(Kern):
return np.diag(self.K(X)) return np.diag(self.K(X))
class PeriodicExponential(Periodic): class PeriodicExponential(Periodic):
""" """
Kernel of the periodic subspace (up to a given frequency) of a exponential Kernel of the periodic subspace (up to a given frequency) of a exponential

View file

@ -21,3 +21,4 @@ from .gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression
from .gp_var_gauss import GPVariationalGaussianApproximation from .gp_var_gauss import GPVariationalGaussianApproximation
from .one_vs_all_classification import OneVsAllClassification from .one_vs_all_classification import OneVsAllClassification
from .one_vs_all_sparse_classification import OneVsAllSparseClassification from .one_vs_all_sparse_classification import OneVsAllSparseClassification
from .dpgplvm import DPBayesianGPLVM

19
GPy/models/dpgplvm.py Normal file
View file

@ -0,0 +1,19 @@
# Copyright (c) 2015 the GPy Austhors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from .. import kern
from bayesian_gplvm import BayesianGPLVM
from ..core.parameterization.variational import NormalPosterior, NormalPrior
class DPBayesianGPLVM(BayesianGPLVM):
"""
Bayesian Gaussian Process Latent Variable Model with Descriminative prior
"""
def __init__(self, Y, input_dim, X_prior, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None,
name='bayesian gplvm', mpi_comm=None, normalizer=None,
missing_data=False, stochastic=False, batchsize=1):
super(DPBayesianGPLVM,self).__init__(Y=Y, input_dim=input_dim, X=X, X_variance=X_variance, init=init, num_inducing=num_inducing, Z=Z, kernel=kernel, inference_method=inference_method, likelihood=likelihood, mpi_comm=mpi_comm, normalizer=normalizer, missing_data=missing_data, stochastic=stochastic, batchsize=batchsize, name='dp bayesian gplvm')
self.X.mean.set_prior(X_prior)
self.link_parameter(X_prior)