mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-20 15:38:08 +02:00
Merge branch 'params' of github.com:SheffieldML/GPy into params
This commit is contained in:
commit
81040f4566
19 changed files with 293 additions and 139 deletions
|
|
@ -24,7 +24,6 @@ class Model(Parameterized):
|
|||
|
||||
def log_likelihood(self):
|
||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.gradient
|
||||
|
||||
|
|
@ -148,7 +147,60 @@ class Model(Parameterized):
|
|||
"""
|
||||
return self.kern.input_sensitivity()
|
||||
|
||||
def objective_function(self, x):
|
||||
def objective_function(self):
|
||||
"""
|
||||
The objective function for the given algorithm.
|
||||
|
||||
This function is the true objective, which wants to be minimized.
|
||||
Note that all parameters are already set and in place, so you just need
|
||||
to return the objective function here.
|
||||
|
||||
For probabilistic models this is the negative log_likelihood
|
||||
(including the MAP prior), so we return it here. If your model is not
|
||||
probabilistic, just return your objective here!
|
||||
"""
|
||||
return -float(self.log_likelihood()) - self.log_prior()
|
||||
|
||||
def objective_function_gradients(self):
|
||||
"""
|
||||
The gradients for the objective function for the given algorithm.
|
||||
|
||||
You can find the gradient for the parameters in self.gradient at all times.
|
||||
This is the place, where gradients get stored for parameters.
|
||||
|
||||
This function is the true objective, which wants to be minimized.
|
||||
Note that all parameters are already set and in place, so you just need
|
||||
to return the gradient here.
|
||||
|
||||
For probabilistic models this is the gradient of the negative log_likelihood
|
||||
(including the MAP prior), so we return it here. If your model is not
|
||||
probabilistic, just return your gradient here!
|
||||
"""
|
||||
return -(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||
|
||||
def _grads(self, x):
|
||||
"""
|
||||
Gets the gradients from the likelihood and the priors.
|
||||
|
||||
Failures are handled robustly. The algorithm will try several times to
|
||||
return the gradients, and will raise the original exception if
|
||||
the objective cannot be computed.
|
||||
|
||||
:param x: the parameters of the model.
|
||||
:type x: np.array
|
||||
"""
|
||||
try:
|
||||
self._set_params_transformed(x)
|
||||
obj_grads = self._transform_gradients(self.objective_function_gradients())
|
||||
self._fail_count = 0
|
||||
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||
if self._fail_count >= self._allowed_failures:
|
||||
raise
|
||||
self._fail_count += 1
|
||||
obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100)
|
||||
return obj_grads
|
||||
|
||||
def _objective(self, x):
|
||||
"""
|
||||
The objective function passed to the optimizer. It combines
|
||||
the likelihood and the priors.
|
||||
|
|
@ -162,55 +214,26 @@ class Model(Parameterized):
|
|||
"""
|
||||
try:
|
||||
self._set_params_transformed(x)
|
||||
obj = self.objective_function()
|
||||
self._fail_count = 0
|
||||
except (LinAlgError, ZeroDivisionError, ValueError) as e:
|
||||
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||
if self._fail_count >= self._allowed_failures:
|
||||
raise e
|
||||
raise
|
||||
self._fail_count += 1
|
||||
return np.inf
|
||||
return -float(self.log_likelihood()) - self.log_prior()
|
||||
return obj
|
||||
|
||||
def objective_function_gradients(self, x):
|
||||
"""
|
||||
Gets the gradients from the likelihood and the priors.
|
||||
|
||||
Failures are handled robustly. The algorithm will try several times to
|
||||
return the gradients, and will raise the original exception if
|
||||
the objective cannot be computed.
|
||||
|
||||
:param x: the parameters of the model.
|
||||
:type x: np.array
|
||||
"""
|
||||
def _objective_grads(self, x):
|
||||
try:
|
||||
self._set_params_transformed(x)
|
||||
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||
obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
|
||||
self._fail_count = 0
|
||||
except (LinAlgError, ZeroDivisionError, ValueError) as e:
|
||||
except (LinAlgError, ZeroDivisionError, ValueError):
|
||||
if self._fail_count >= self._allowed_failures:
|
||||
raise e
|
||||
self._fail_count += 1
|
||||
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
|
||||
return obj_grads
|
||||
|
||||
def objective_and_gradients(self, x):
|
||||
"""
|
||||
Compute the objective function of the model and the gradient of the model at the point given by x.
|
||||
|
||||
:param x: the point at which gradients are to be computed.
|
||||
:type x: np.array
|
||||
"""
|
||||
|
||||
try:
|
||||
self._set_params_transformed(x)
|
||||
obj_f = -float(self.log_likelihood()) - self.log_prior()
|
||||
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
|
||||
self._fail_count = 0
|
||||
except (LinAlgError, ZeroDivisionError, ValueError) as e:
|
||||
if self._fail_count >= self._allowed_failures:
|
||||
raise e
|
||||
raise
|
||||
self._fail_count += 1
|
||||
obj_f = np.inf
|
||||
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
|
||||
obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100)
|
||||
return obj_f, obj_grads
|
||||
|
||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||
|
|
@ -241,7 +264,7 @@ class Model(Parameterized):
|
|||
optimizer = optimization.get_optimizer(optimizer)
|
||||
opt = optimizer(start, model=self, **kwargs)
|
||||
|
||||
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
|
||||
opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)
|
||||
|
||||
self.optimization_runs.append(opt)
|
||||
|
||||
|
|
@ -292,9 +315,9 @@ class Model(Parameterized):
|
|||
dx[transformed_index] = step * np.sign(np.random.uniform(-1, 1, transformed_index.size))
|
||||
|
||||
# evaulate around the point x
|
||||
f1 = self.objective_function(x + dx)
|
||||
f2 = self.objective_function(x - dx)
|
||||
gradient = self.objective_function_gradients(x)
|
||||
f1 = self._objective(x + dx)
|
||||
f2 = self._objective(x - dx)
|
||||
gradient = self._grads(x)
|
||||
|
||||
dx = dx[transformed_index]
|
||||
gradient = gradient[transformed_index]
|
||||
|
|
@ -337,15 +360,15 @@ class Model(Parameterized):
|
|||
print "No free parameters to check"
|
||||
return
|
||||
|
||||
gradient = self.objective_function_gradients(x).copy()
|
||||
gradient = self._grads(x).copy()
|
||||
np.where(gradient == 0, 1e-312, gradient)
|
||||
ret = True
|
||||
for nind, xind in itertools.izip(param_index, transformed_index):
|
||||
xx = x.copy()
|
||||
xx[xind] += step
|
||||
f1 = self.objective_function(xx)
|
||||
f1 = self._objective(xx)
|
||||
xx[xind] -= 2.*step
|
||||
f2 = self.objective_function(xx)
|
||||
f2 = self._objective(xx)
|
||||
numerical_gradient = (f1 - f2) / (2 * step)
|
||||
if np.all(gradient[xind]==0): ratio = (f1-f2) == gradient[xind]
|
||||
else: ratio = (f1 - f2) / (2 * step * gradient[xind])
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
__updated__ = '2014-03-21'
|
||||
__updated__ = '2014-03-24'
|
||||
|
||||
import numpy as np
|
||||
from parameter_core import Observable
|
||||
|
|
@ -38,24 +38,9 @@ class ObsAr(np.ndarray, Observable):
|
|||
np.ndarray.__setstate__(self, state[0])
|
||||
Observable._setstate(self, state[1])
|
||||
|
||||
def _s_not_empty(self, s):
|
||||
# this checks whether there is something picked by this slice.
|
||||
return True
|
||||
# TODO: disarmed, for performance increase,
|
||||
if not isinstance(s, (list,tuple,np.ndarray)):
|
||||
return True
|
||||
if isinstance(s, (list,tuple)):
|
||||
return len(s)!=0
|
||||
if isinstance(s, np.ndarray):
|
||||
if s.dtype is bool:
|
||||
return np.all(s)
|
||||
else:
|
||||
return s.size != 0
|
||||
|
||||
def __setitem__(self, s, val):
|
||||
if self._s_not_empty(s):
|
||||
super(ObsAr, self).__setitem__(s, val)
|
||||
self.notify_observers()
|
||||
super(ObsAr, self).__setitem__(s, val)
|
||||
self.notify_observers()
|
||||
|
||||
def __getslice__(self, start, stop):
|
||||
return self.__getitem__(slice(start, stop))
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ class ParameterIndexOperationsView(object):
|
|||
|
||||
|
||||
def remove(self, prop, indices):
|
||||
removed = self._param_index_ops.remove(prop, indices+self._offset)
|
||||
removed = self._param_index_ops.remove(prop, numpy.array(indices)+self._offset)
|
||||
if removed.size > 0:
|
||||
return removed - self._size + 1
|
||||
return removed
|
||||
|
|
|
|||
|
|
@ -282,8 +282,8 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
if isinstance(slice_index, (tuple, list)):
|
||||
clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
|
||||
for i in range(self._realndim_-len(clean_curr_slice)):
|
||||
i+=len(clean_curr_slice)
|
||||
clean_curr_slice += range(self._realshape_[i])
|
||||
i+=1
|
||||
clean_curr_slice += [range(self._realshape_[i])]
|
||||
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
|
||||
and len(set(map(len, clean_curr_slice))) <= 1):
|
||||
return numpy.fromiter(itertools.izip(*clean_curr_slice),
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ class Parameterized(Parameterizable, Pickleable):
|
|||
# Metaclass for parameters changed after init.
|
||||
# This makes sure, that parameters changed will always be called after __init__
|
||||
# **Never** call parameters_changed() yourself
|
||||
__metaclass__ = ParametersChangedMeta
|
||||
__metaclass__ = ParametersChangedMeta
|
||||
#===========================================================================
|
||||
def __init__(self, name=None, parameters=[], *a, **kw):
|
||||
super(Parameterized, self).__init__(name=name, *a, **kw)
|
||||
|
|
|
|||
|
|
@ -277,7 +277,9 @@ def bgplvm_simulation(optimize=True, verbose=1,
|
|||
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||
#k = kern.RBF(Q, ARD=True, lengthscale=10.)
|
||||
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
|
||||
|
||||
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
|
||||
m.likelihood.variance = .1
|
||||
|
||||
if optimize:
|
||||
print "Optimizing model:"
|
||||
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
|
||||
|
|
@ -299,15 +301,16 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
|||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||
Y = Ylist[0]
|
||||
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||
|
||||
|
||||
inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
|
||||
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
|
||||
m.inference_method = VarDTCMissingData()
|
||||
m.Y[inan] = _np.nan
|
||||
m.X.variance *= .1
|
||||
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
|
||||
m.likelihood.variance = .01
|
||||
m.parameters_changed()
|
||||
m.Yreal = Y
|
||||
|
||||
|
||||
if optimize:
|
||||
print "Optimizing model:"
|
||||
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
|
||||
|
|
@ -325,11 +328,11 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
|||
|
||||
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
|
||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||
|
||||
|
||||
#Ylist = [Ylist[0]]
|
||||
k = [kern.Linear(Q, ARD=True) + kern.White(Q, 1e-4) for _ in range(len(Ylist))]
|
||||
k = [kern.Linear(Q, ARD=True) for _ in range(len(Ylist))]
|
||||
m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="", initz='permute', **kw)
|
||||
|
||||
|
||||
m['.*noise'] = [Y.var()/500. for Y in Ylist]
|
||||
#for i, Y in enumerate(Ylist):
|
||||
# m['.*Y_{}.*Gaussian.*noise'.format(i)] = Y.var(1) / 500.
|
||||
|
|
|
|||
|
|
@ -179,6 +179,7 @@ class VarDTC(object):
|
|||
return post, log_marginal, grad_dict
|
||||
|
||||
class VarDTCMissingData(object):
|
||||
const_jitter = 1e-6
|
||||
def __init__(self, limit=1):
|
||||
from ...util.caching import Cacher
|
||||
self._Y = Cacher(self._subarray_computations, limit)
|
||||
|
|
@ -250,7 +251,7 @@ class VarDTCMissingData(object):
|
|||
|
||||
for y, trYYT, [v, ind] in itertools.izip(Ys, traces, self._subarray_indices):
|
||||
if het_noise: beta = beta_all[ind]
|
||||
else: beta = beta_all[0]
|
||||
else: beta = beta_all
|
||||
|
||||
VVT_factor = (beta*y)
|
||||
VVT_factor_all[v, ind].flat = VVT_factor.flat
|
||||
|
|
@ -311,7 +312,7 @@ class VarDTCMissingData(object):
|
|||
het_noise, uncertain_inputs, LB,
|
||||
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
|
||||
psi0, psi1, beta,
|
||||
data_fit, num_data, output_dim, trYYT)
|
||||
data_fit, num_data, output_dim, trYYT, Y)
|
||||
|
||||
if full_VVT_factor: woodbury_vector[:, ind] = Cpsi1Vf
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ class Linear(Kern):
|
|||
gamma = variational_posterior.binary_prob
|
||||
mu = variational_posterior.mean
|
||||
return np.einsum('nq,q,mq,nq->nm',gamma,self.variances,Z,mu)
|
||||
# return (self.variances*gamma*mu).sum(axis=1)
|
||||
# return (self.variances*gamma*mu).sum(axis=1)
|
||||
else:
|
||||
return self.K(variational_posterior.mean, Z) #the variance, it does nothing
|
||||
|
||||
|
|
@ -177,7 +177,7 @@ class Linear(Kern):
|
|||
|
||||
grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\
|
||||
np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ)
|
||||
|
||||
|
||||
return grad
|
||||
else:
|
||||
#psi1
|
||||
|
|
@ -191,15 +191,15 @@ class Linear(Kern):
|
|||
gamma = variational_posterior.binary_prob
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
mu2S = np.square(mu)+S
|
||||
mu2S = np.square(mu)+S
|
||||
_, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
|
||||
|
||||
|
||||
grad_gamma = np.einsum('n,q,nq->nq',dL_dpsi0,self.variances,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,self.variances,Z,mu) +\
|
||||
np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dgamma)
|
||||
grad_mu = np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*self.variances,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,self.variances,Z) +\
|
||||
np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dmu)
|
||||
grad_S = np.einsum('n,nq,q->nq',dL_dpsi0,gamma,self.variances) + np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dS)
|
||||
|
||||
|
||||
return grad_mu, grad_S, grad_gamma
|
||||
else:
|
||||
grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape)
|
||||
|
|
@ -210,7 +210,7 @@ class Linear(Kern):
|
|||
grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
|
||||
# psi2
|
||||
self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S)
|
||||
|
||||
|
||||
return grad_mu, grad_S
|
||||
|
||||
#--------------------------------------------------#
|
||||
|
|
|
|||
|
|
@ -58,8 +58,6 @@ class Prod(CombinationKernel):
|
|||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
target = np.zeros(X.shape)
|
||||
for k1,k2 in itertools.combinations(self.parts, 2):
|
||||
target += k1.gradients_X(dL_dKdiag*k2.Kdiag(X), X)
|
||||
target += k2.gradients_X(dL_dKdiag*k1.Kdiag(X), X)
|
||||
target += k1.gradients_X_diag(dL_dKdiag*k2.Kdiag(X), X)
|
||||
target += k2.gradients_X_diag(dL_dKdiag*k1.Kdiag(X), X)
|
||||
return target
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,10 @@ class BayesianGPLVM(SparseGP):
|
|||
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs):
|
||||
if X == None:
|
||||
from ..util.initialization import initialize_latent
|
||||
X = initialize_latent(init, input_dim, Y)
|
||||
X, fracs = initialize_latent(init, input_dim, Y)
|
||||
else:
|
||||
fracs = np.ones(input_dim)
|
||||
|
||||
self.init = init
|
||||
|
||||
if X_variance is None:
|
||||
|
|
@ -38,7 +41,7 @@ class BayesianGPLVM(SparseGP):
|
|||
assert Z.shape[1] == X.shape[1]
|
||||
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(input_dim) # + kern.white(input_dim)
|
||||
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
|
||||
|
||||
if likelihood is None:
|
||||
likelihood = Gaussian()
|
||||
|
|
@ -47,6 +50,14 @@ class BayesianGPLVM(SparseGP):
|
|||
self.variational_prior = NormalPrior()
|
||||
X = NormalPosterior(X, X_variance)
|
||||
|
||||
if inference_method is None:
|
||||
if np.any(np.isnan(Y)):
|
||||
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
|
||||
inference_method = VarDTCMissingData()
|
||||
else:
|
||||
from ..inference.latent_function_inference.var_dtc import VarDTC
|
||||
inference_method = VarDTC()
|
||||
|
||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
||||
self.add_parameter(self.X, index=0)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,10 +2,10 @@
|
|||
# Copyright (c) 2013, the GPy Authors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..core import GP
|
||||
from .. import likelihoods
|
||||
from .. import kern
|
||||
from ..inference.latent_function_inference.expectation_propagation import EP
|
||||
|
||||
class GPClassification(GP):
|
||||
"""
|
||||
|
|
@ -27,4 +27,4 @@ class GPClassification(GP):
|
|||
|
||||
likelihood = likelihoods.Bernoulli()
|
||||
|
||||
GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, name='gp_classification')
|
||||
GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=EP(), name='gp_classification')
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
import numpy as np
|
||||
import pylab as pb
|
||||
from .. import kern
|
||||
from ..util.linalg import PCA
|
||||
from ..core import GP, Param
|
||||
from ..likelihoods import Gaussian
|
||||
from .. import util
|
||||
|
|
@ -29,9 +28,11 @@ class GPLVM(GP):
|
|||
"""
|
||||
if X is None:
|
||||
from ..util.initialization import initialize_latent
|
||||
X = initialize_latent(init, input_dim, Y)
|
||||
X, fracs = initialize_latent(init, input_dim, Y)
|
||||
else:
|
||||
fracs = np.ones(input_dim)
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(input_dim, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2))
|
||||
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=input_dim > 1) + kern.Bias(input_dim, np.exp(-2))
|
||||
|
||||
likelihood = Gaussian()
|
||||
|
||||
|
|
|
|||
|
|
@ -6,12 +6,12 @@ import itertools
|
|||
import pylab
|
||||
|
||||
from ..core import Model
|
||||
from ..util.linalg import PCA
|
||||
from ..kern import Kern
|
||||
from ..core.parameterization.variational import NormalPosterior, NormalPrior
|
||||
from ..core.parameterization import Param, Parameterized
|
||||
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData, VarDTC
|
||||
from ..likelihoods import Gaussian
|
||||
from GPy.util.initialization import initialize_latent
|
||||
|
||||
class MRD(Model):
|
||||
"""
|
||||
|
|
@ -51,27 +51,28 @@ class MRD(Model):
|
|||
inference_method=None, likelihood=None, name='mrd', Ynames=None):
|
||||
super(MRD, self).__init__(name)
|
||||
|
||||
self.input_dim = input_dim
|
||||
self.num_inducing = num_inducing
|
||||
|
||||
self.Ylist = Ylist
|
||||
self._in_init_ = True
|
||||
X, fracs = self._init_X(initx, Ylist)
|
||||
self.Z = Param('inducing inputs', self._init_Z(initz, X))
|
||||
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
|
||||
|
||||
# sort out the kernels
|
||||
if kernel is None:
|
||||
from ..kern import RBF
|
||||
self.kern = [RBF(input_dim, ARD=1, name='rbf'.format(i)) for i in range(len(Ylist))]
|
||||
self.kern = [RBF(input_dim, ARD=1, lengthscale=fracs[i], name='rbf'.format(i)) for i in range(len(Ylist))]
|
||||
elif isinstance(kernel, Kern):
|
||||
self.kern = [kernel.copy(name='{}'.format(kernel.name, i)) for i in range(len(Ylist))]
|
||||
else:
|
||||
assert len(kernel) == len(Ylist), "need one kernel per output"
|
||||
assert all([isinstance(k, Kern) for k in kernel]), "invalid kernel object detected!"
|
||||
self.kern = kernel
|
||||
self.input_dim = input_dim
|
||||
self.num_inducing = num_inducing
|
||||
|
||||
self.Ylist = Ylist
|
||||
self._in_init_ = True
|
||||
X = self._init_X(initx, Ylist)
|
||||
self.Z = Param('inducing inputs', self._init_Z(initz, X))
|
||||
self.num_inducing = self.Z.shape[0] # ensure M==N if M>N
|
||||
|
||||
if X_variance is None:
|
||||
X_variance = np.random.uniform(0, .2, X.shape)
|
||||
X_variance = np.random.uniform(0, .1, X.shape)
|
||||
|
||||
self.variational_prior = NormalPrior()
|
||||
self.X = NormalPosterior(X, X_variance)
|
||||
|
|
@ -108,8 +109,7 @@ class MRD(Model):
|
|||
self._log_marginal_likelihood = 0
|
||||
self.posteriors = []
|
||||
self.Z.gradient = 0.
|
||||
self.X.mean.gradient = 0.
|
||||
self.X.variance.gradient = 0.
|
||||
self.X.gradient = 0.
|
||||
|
||||
for y, k, l, i in itertools.izip(self.Ylist, self.kern, self.likelihood, self.inference_method):
|
||||
posterior, lml, grad_dict = i.inference(k, self.X, self.Z, l, y)
|
||||
|
|
@ -147,14 +147,20 @@ class MRD(Model):
|
|||
if Ylist is None:
|
||||
Ylist = self.Ylist
|
||||
if init in "PCA_concat":
|
||||
X = PCA(np.hstack(Ylist), self.input_dim)[0]
|
||||
X, fracs = initialize_latent('PCA', self.input_dim, np.hstack(Ylist))
|
||||
fracs = [fracs]*self.input_dim
|
||||
elif init in "PCA_single":
|
||||
X = np.zeros((Ylist[0].shape[0], self.input_dim))
|
||||
fracs = []
|
||||
for qs, Y in itertools.izip(np.array_split(np.arange(self.input_dim), len(Ylist)), Ylist):
|
||||
X[:, qs] = PCA(Y, len(qs))[0]
|
||||
x,frcs = initialize_latent('PCA', len(qs), Y)
|
||||
X[:, qs] = x
|
||||
fracs.append(frcs)
|
||||
else: # init == 'random':
|
||||
X = np.random.randn(Ylist[0].shape[0], self.input_dim)
|
||||
return X
|
||||
fracs = X.var(0)
|
||||
fracs = [fracs]*self.input_dim
|
||||
return X, fracs
|
||||
|
||||
def _init_Z(self, init="permute", X=None):
|
||||
if X is None:
|
||||
|
|
|
|||
|
|
@ -33,6 +33,8 @@ class Test(unittest.TestCase):
|
|||
self.assertListEqual(self.param_index[one].tolist(), [3])
|
||||
self.assertListEqual(self.param_index.remove('not in there', [2,3,4]).tolist(), [])
|
||||
|
||||
self.assertListEqual(self.view.remove('not in there', [2,3,4]).tolist(), [])
|
||||
|
||||
def test_shift_left(self):
|
||||
self.view.shift_left(0, 2)
|
||||
self.assertListEqual(self.param_index[three].tolist(), [2,5])
|
||||
|
|
@ -82,6 +84,10 @@ class Test(unittest.TestCase):
|
|||
self.assertEqual(self.param_index.size, 6)
|
||||
self.assertEqual(self.view.size, 5)
|
||||
|
||||
def test_print(self):
|
||||
print self.param_index
|
||||
print self.view
|
||||
|
||||
if __name__ == "__main__":
|
||||
#import sys;sys.argv = ['', 'Test.test_index_view']
|
||||
unittest.main()
|
||||
|
|
@ -8,6 +8,7 @@ import sys
|
|||
|
||||
verbose = 0
|
||||
|
||||
np.random.seed(50)
|
||||
|
||||
|
||||
class Kern_check_model(GPy.core.Model):
|
||||
|
|
@ -243,6 +244,17 @@ class KernelGradientTestsContinuous(unittest.TestCase):
|
|||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
def test_Prod2(self):
|
||||
k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
|
||||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
def test_Prod3(self):
|
||||
k = GPy.kern.Matern32(2, active_dims=[2,3]) * (GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D))
|
||||
k = (GPy.kern.RBF(2, active_dims=[0,4]) * GPy.kern.Linear(self.D))
|
||||
k.randomize()
|
||||
self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose))
|
||||
|
||||
def test_Add(self):
|
||||
k = GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
||||
k += GPy.kern.Matern32(2, active_dims=[2,3]) + GPy.kern.RBF(2, active_dims=[0,4]) + GPy.kern.Linear(self.D)
|
||||
|
|
|
|||
|
|
@ -130,7 +130,14 @@ class MiscTests(unittest.TestCase):
|
|||
m2.kern[:] = m.kern[''].values()
|
||||
np.testing.assert_equal(m.log_likelihood(), m2.log_likelihood())
|
||||
|
||||
class GradientTests(unittest.TestCase):
|
||||
def test_model_optimize(self):
|
||||
X = np.random.uniform(-3., 3., (20, 1))
|
||||
Y = np.sin(X) + np.random.randn(20, 1) * 0.05
|
||||
m = GPy.models.GPRegression(X,Y)
|
||||
m.optimize()
|
||||
print m
|
||||
|
||||
class GradientTests(np.testing.TestCase):
|
||||
def setUp(self):
|
||||
######################################
|
||||
# # 1 dimensional example
|
||||
|
|
|
|||
|
|
@ -5,13 +5,15 @@ Created on 24 Feb 2014
|
|||
'''
|
||||
|
||||
import numpy as np
|
||||
from linalg import PCA
|
||||
from GPy.util.pca import pca
|
||||
|
||||
def initialize_latent(init, input_dim, Y):
|
||||
Xr = np.random.randn(Y.shape[0], input_dim)
|
||||
if init == 'PCA':
|
||||
PC = PCA(Y, input_dim)[0]
|
||||
p = pca(Y)
|
||||
PC = p.project(Y, min(input_dim, Y.shape[1]))
|
||||
Xr[:PC.shape[0], :PC.shape[1]] = PC
|
||||
else:
|
||||
pass
|
||||
return Xr
|
||||
var = Xr.var(0)
|
||||
return Xr, var/var.max()
|
||||
return Xr, p.fracs[:input_dim]
|
||||
|
|
@ -580,26 +580,3 @@ def backsub_both_sides(L, X, transpose='left'):
|
|||
tmp, _ = dtrtrs(L, X, lower=1, trans=0)
|
||||
return dtrtrs(L, tmp.T, lower=1, trans=0)[0].T
|
||||
|
||||
def PCA(Y, input_dim):
|
||||
"""
|
||||
Principal component analysis: maximum likelihood solution by SVD
|
||||
|
||||
:param Y: NxD np.array of data
|
||||
:param input_dim: int, dimension of projection
|
||||
|
||||
|
||||
:rval X: - Nxinput_dim np.array of dimensionality reduced data
|
||||
:rval W: - input_dimxD mapping from X to Y
|
||||
|
||||
"""
|
||||
if not np.allclose(Y.mean(axis=0), 0.0):
|
||||
print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)"
|
||||
|
||||
# Y -= Y.mean(axis=0)
|
||||
|
||||
Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False)
|
||||
[X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]]
|
||||
v = X.std(axis=0)
|
||||
X /= v;
|
||||
W *= v;
|
||||
return X, W.T
|
||||
|
|
|
|||
122
GPy/util/pca.py
Normal file
122
GPy/util/pca.py
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
'''
|
||||
Created on 10 Sep 2012
|
||||
|
||||
@author: Max Zwiessele
|
||||
@copyright: Max Zwiessele 2012
|
||||
'''
|
||||
import numpy
|
||||
import pylab
|
||||
import matplotlib
|
||||
from numpy.linalg.linalg import LinAlgError
|
||||
|
||||
class pca(object):
|
||||
"""
|
||||
pca module with automatic primal/dual determination.
|
||||
"""
|
||||
def __init__(self, X):
|
||||
self.mu = X.mean(0)
|
||||
self.sigma = X.std(0)
|
||||
|
||||
X = self.center(X)
|
||||
|
||||
# self.X = input
|
||||
if X.shape[0] >= X.shape[1]:
|
||||
# print "N >= D: using primal"
|
||||
self.eigvals, self.eigvectors = self._primal_eig(X)
|
||||
else:
|
||||
# print "N < D: using dual"
|
||||
self.eigvals, self.eigvectors = self._dual_eig(X)
|
||||
self.sort = numpy.argsort(self.eigvals)[::-1]
|
||||
self.eigvals = self.eigvals[self.sort]
|
||||
self.eigvectors = self.eigvectors[:, self.sort]
|
||||
self.fracs = self.eigvals / self.eigvals.sum()
|
||||
self.Q = self.eigvals.shape[0]
|
||||
|
||||
def center(self, X):
|
||||
"""
|
||||
Center `X` in pca space.
|
||||
"""
|
||||
X = X - self.mu
|
||||
X = X / numpy.where(self.sigma == 0, 1e-30, self.sigma)
|
||||
return X
|
||||
|
||||
def _primal_eig(self, X):
|
||||
return numpy.linalg.eigh(numpy.einsum('ji,jk->ik',X,X))
|
||||
|
||||
def _dual_eig(self, X):
|
||||
dual_eigvals, dual_eigvects = numpy.linalg.eigh(numpy.einsum('ij,kj->ik',X,X))
|
||||
relevant_dimensions = numpy.argsort(numpy.abs(dual_eigvals))[-X.shape[1]:]
|
||||
eigvals = dual_eigvals[relevant_dimensions]
|
||||
eigvects = dual_eigvects[:, relevant_dimensions]
|
||||
eigvects = (1. / numpy.sqrt(X.shape[0] * numpy.abs(eigvals))) * X.T.dot(eigvects)
|
||||
eigvects /= numpy.sqrt(numpy.diag(eigvects.T.dot(eigvects)))
|
||||
return eigvals, eigvects
|
||||
|
||||
def project(self, X, Q=None):
|
||||
"""
|
||||
Project X into pca space, defined by the Q highest eigenvalues.
|
||||
Y = X dot V
|
||||
"""
|
||||
if Q is None:
|
||||
Q = self.Q
|
||||
if Q > X.shape[1]:
|
||||
raise IndexError("requested dimension larger then input dimension")
|
||||
X = self.center(X)
|
||||
return X.dot(self.eigvectors[:, :Q])
|
||||
|
||||
def plot_fracs(self, Q=None, ax=None, fignum=None):
|
||||
"""
|
||||
Plot fractions of Eigenvalues sorted in descending order.
|
||||
"""
|
||||
if ax is None:
|
||||
fig = pylab.figure(fignum)
|
||||
ax = fig.add_subplot(111)
|
||||
if Q is None:
|
||||
Q = self.Q
|
||||
ticks = numpy.arange(Q)
|
||||
bar = ax.bar(ticks - .4, self.fracs[:Q])
|
||||
ax.set_xticks(ticks, map(lambda x: r"${}$".format(x), ticks + 1))
|
||||
ax.set_ylabel("Eigenvalue fraction")
|
||||
ax.set_xlabel("PC")
|
||||
ax.set_ylim(0, ax.get_ylim()[1])
|
||||
ax.set_xlim(ticks.min() - .5, ticks.max() + .5)
|
||||
try:
|
||||
pylab.tight_layout()
|
||||
except:
|
||||
pass
|
||||
return bar
|
||||
|
||||
def plot_2d(self, X, labels=None, s=20, marker='o',
|
||||
dimensions=(0, 1), ax=None, colors=None,
|
||||
fignum=None, cmap=matplotlib.cm.jet, # @UndefinedVariable
|
||||
** kwargs):
|
||||
"""
|
||||
Plot dimensions `dimensions` with given labels against each other in
|
||||
PC space. Labels can be any sequence of labels of dimensions X.shape[0].
|
||||
Labels can be drawn with a subsequent call to legend()
|
||||
"""
|
||||
if ax is None:
|
||||
fig = pylab.figure(fignum)
|
||||
ax = fig.add_subplot(111)
|
||||
if labels is None:
|
||||
labels = numpy.zeros(X.shape[0])
|
||||
ulabels = []
|
||||
for lab in labels:
|
||||
if not lab in ulabels:
|
||||
ulabels.append(lab)
|
||||
nlabels = len(ulabels)
|
||||
if colors is None:
|
||||
colors = [cmap(float(i) / nlabels) for i in range(nlabels)]
|
||||
X_ = self.project(X, self.Q)[:,dimensions]
|
||||
kwargs.update(dict(s=s))
|
||||
plots = list()
|
||||
for i, l in enumerate(ulabels):
|
||||
kwargs.update(dict(color=colors[i], marker=marker[i % len(marker)]))
|
||||
plots.append(ax.scatter(*X_[labels == l, :].T, label=str(l), **kwargs))
|
||||
ax.set_xlabel(r"PC$_1$")
|
||||
ax.set_ylabel(r"PC$_2$")
|
||||
try:
|
||||
pylab.tight_layout()
|
||||
except:
|
||||
pass
|
||||
return plots
|
||||
Loading…
Add table
Add a link
Reference in a new issue