Merge branch 'devel' of github.com:SheffieldML/GPy into devel

Conflicts:
	GPy/core/fitc.py
This commit is contained in:
Ricardo 2013-06-05 16:37:57 +01:00
commit c774432fee
56 changed files with 783 additions and 807 deletions

View file

@ -14,7 +14,7 @@ class FITC(SparseGP):
sparse FITC approximation sparse FITC approximation
:param X: inputs :param X: inputs
:type X: np.ndarray (N x Q) :type X: np.ndarray (num_data x Q)
:param likelihood: a likelihood instance, containing the observed data :param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP) :type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel : the kernel (covariance function). See link kernels :param kernel : the kernel (covariance function). See link kernels
@ -57,7 +57,7 @@ class FITC(SparseGP):
self.V_star = self.beta_star * self.likelihood.Y self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A # The rather complex computations of self.A
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N))) tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
@ -113,7 +113,7 @@ class FITC(SparseGP):
self._dpsi1_dX_jkj = 0 self._dpsi1_dX_jkj = 0
self._dpsi1_dtheta_jkj = 0 self._dpsi1_dtheta_jkj = 0
for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3): for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.num_data),self.V_star,alpha,gamma_2,gamma_3):
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T) K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:] _dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
@ -137,14 +137,14 @@ class FITC(SparseGP):
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1) aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V) aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise) dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T) dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star) alpha = mdot(LBiLmipsi1,self.V_star)
alpha_ = mdot(LBiLmipsi1.T,alpha) alpha_ = mdot(LBiLmipsi1.T,alpha)
dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise ) dD_dnoise_2 = -0.5 * self.input_dim * np.sum(alpha_**2 * dbstar_dnoise )
dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y) dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star) dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
@ -154,7 +154,7 @@ class FITC(SparseGP):
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.N * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y) A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) C = -self.output_dim * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D return A + C + D
@ -204,8 +204,8 @@ class FITC(SparseGP):
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T) # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T # Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T # C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T # = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T # = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V # = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn) U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)

View file

@ -33,8 +33,8 @@ class GP(GPBase):
self._set_params(self._get_params()) self._set_params(self._get_params())
def _set_params(self, p): def _set_params(self, p):
self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()]) self.kern._set_params_transformed(p[:self.kern.num_params_transformed()])
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) self.likelihood._set_params(p[self.kern.num_params_transformed():])
self.K = self.kern.K(self.X) self.K = self.kern.K(self.X)
self.K += self.likelihood.covariance_matrix self.K += self.likelihood.covariance_matrix
@ -46,12 +46,12 @@ class GP(GPBase):
#alpha = np.dot(self.Ki, self.likelihood.Y) #alpha = np.dot(self.Ki, self.likelihood.Y)
alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1) alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1)
self.dL_dK = 0.5 * (tdot(alpha) - self.input_dim * self.Ki) self.dL_dK = 0.5 * (tdot(alpha) - self.output_dim * self.Ki)
else: else:
#tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki) #tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1) tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1) tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.input_dim * self.Ki) self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki)
def _get_params(self): def _get_params(self):
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params())) return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))

View file

@ -1,24 +1,24 @@
import numpy as np import numpy as np
import model
from .. import kern from .. import kern
from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D
import pylab as pb import pylab as pb
from GPy.core.model import Model
class GPBase(model.model): class GPBase(Model):
""" """
Gaussian Process model for holding shared behaviour between Gaussian Process Model for holding shared behaviour between
sprase_GP and GP models sprase_GP and GP models
""" """
def __init__(self, X, likelihood, kernel, normalize_X=False): def __init__(self, X, likelihood, kernel, normalize_X=False):
self.X = X self.X = X
assert len(self.X.shape) == 2 assert len(self.X.shape) == 2
self.N, self.input_dim = self.X.shape self.num_data, self.input_dim = self.X.shape
assert isinstance(kernel, kern.kern) assert isinstance(kernel, kern.kern)
self.kern = kernel self.kern = kernel
self.likelihood = likelihood self.likelihood = likelihood
assert self.X.shape[0] == self.likelihood.data.shape[0] assert self.X.shape[0] == self.likelihood.data.shape[0]
self.N, self.output_dim = self.likelihood.data.shape self.num_data, self.output_dim = self.likelihood.data.shape
if normalize_X: if normalize_X:
self._Xmean = X.mean(0)[None, :] self._Xmean = X.mean(0)[None, :]
@ -28,7 +28,7 @@ class GPBase(model.model):
self._Xmean = np.zeros((1, self.input_dim)) self._Xmean = np.zeros((1, self.input_dim))
self._Xstd = np.ones((1, self.input_dim)) self._Xstd = np.ones((1, self.input_dim))
model.model.__init__(self) Model.__init__(self)
# All leaf nodes should call self._set_params(self._get_params()) at # All leaf nodes should call self._set_params(self._get_params()) at
# the end # the end
@ -84,8 +84,8 @@ class GPBase(model.model):
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution) Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
m, v = self._raw_predict(Xnew, which_parts=which_parts) m, v = self._raw_predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T m = m.reshape(resolution, resolution).T
ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0]) ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1]) ax.set_ylim(xmin[1], xmax[1])
else: else:
@ -94,9 +94,9 @@ class GPBase(model.model):
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None): def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None):
""" """
TODO: Docstrings! TODO: Docstrings!
:param levels: for 2D plotting, the number of contour levels to use :param levels: for 2D plotting, the number of contour levels to use
is ax is None, create a new figure is ax is None, create a new figure
""" """
# TODO include samples # TODO include samples
if which_data == 'all': if which_data == 'all':
@ -111,7 +111,7 @@ class GPBase(model.model):
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts) m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
for d in range(m.shape[1]): for d in range(m.shape[1]):
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax)
ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5) ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5)
@ -122,13 +122,13 @@ class GPBase(model.model):
elif self.X.shape[1] == 2: # FIXME elif self.X.shape[1] == 2: # FIXME
resolution = resolution or 50 resolution = resolution or 50
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts) m, _, lower, upper = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T m = m.reshape(resolution, resolution).T
ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
Yf = self.likelihood.Y.flatten() Yf = self.likelihood.Y.flatten()
ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0]) ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1]) ax.set_ylim(xmin[1], xmax[1])

View file

@ -6,37 +6,32 @@ from .. import likelihoods
from ..inference import optimization from ..inference import optimization
from ..util.linalg import jitchol from ..util.linalg import jitchol
from GPy.util.misc import opt_wrapper from GPy.util.misc import opt_wrapper
from parameterised import parameterised from parameterised import Parameterised
from scipy import optimize
import multiprocessing as mp import multiprocessing as mp
import numpy as np import numpy as np
import priors
import re
import sys
import pdb
from GPy.core.domains import POSITIVE, REAL from GPy.core.domains import POSITIVE, REAL
# import numdifftools as ndt # import numdifftools as ndt
class model(parameterised): class Model(Parameterised):
def __init__(self): def __init__(self):
parameterised.__init__(self) Parameterised.__init__(self)
self.priors = None self.priors = None
self.optimization_runs = [] self.optimization_runs = []
self.sampling_runs = [] self.sampling_runs = []
self.preferred_optimizer = 'scg' self.preferred_optimizer = 'scg'
# self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes # self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
def _get_params(self): def _get_params(self):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the Model class"
def _set_params(self, x): def _set_params(self, x):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the Model class"
def log_likelihood(self): def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the Model class"
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the Model class"
def set_prior(self, regexp, what): def set_prior(self, regexp, what):
""" """
Sets priors on the model parameters. Sets priors on the Model parameters.
Arguments Arguments
--------- ---------
@ -95,7 +90,7 @@ class model(parameterised):
def get_gradient(self, name, return_names=False): def get_gradient(self, name, return_names=False):
""" """
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. Get Model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
""" """
matches = self.grep_param_names(name) matches = self.grep_param_names(name)
if len(matches): if len(matches):
@ -135,7 +130,7 @@ class model(parameterised):
def randomize(self): def randomize(self):
""" """
Randomize the model. Randomize the Model.
Make this draw from the Prior if one exists, else draw from N(0,1) Make this draw from the Prior if one exists, else draw from N(0,1)
""" """
# first take care of all parameters (from N(0,1)) # first take care of all parameters (from N(0,1))
@ -150,13 +145,13 @@ class model(parameterised):
self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...) self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
""" """
Perform random restarts of the model, and set the model to the best Perform random restarts of the Model, and set the Model to the best
seen solution. seen solution.
If the robust flag is set, exceptions raised during optimizations will If the robust flag is set, exceptions raised during optimizations will
be handled silently. If _all_ runs fail, the model is reset to the be handled silently. If _all_ runs fail, the Model is reset to the
existing parameter values. existing parameter values.
Notes Notes
@ -179,7 +174,7 @@ class model(parameterised):
try: try:
jobs = [] jobs = []
pool = mp.Pool(processes=num_processes) pool = mp.Pool(processes=num_processes)
for i in range(Nrestarts): for i in range(num_restarts):
self.randomize() self.randomize()
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs) job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
jobs.append(job) jobs.append(job)
@ -191,7 +186,7 @@ class model(parameterised):
pool.terminate() pool.terminate()
pool.join() pool.join()
for i in range(Nrestarts): for i in range(num_restarts):
try: try:
if not parallel: if not parallel:
self.randomize() self.randomize()
@ -200,10 +195,10 @@ class model(parameterised):
self.optimization_runs.append(jobs[i].get()) self.optimization_runs.append(jobs[i].get())
if verbose: if verbose:
print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt)) print("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt))
except Exception as e: except Exception as e:
if robust: if robust:
print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts)) print("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts))
else: else:
raise e raise e
@ -218,7 +213,7 @@ class model(parameterised):
Ensure that any variables which should clearly be positive have been constrained somehow. Ensure that any variables which should clearly be positive have been constrained somehow.
""" """
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa'] positive_strings = ['variance', 'lengthscale', 'precision', 'kappa']
param_names = self._get_param_names() # param_names = self._get_param_names()
currently_constrained = self.all_constrained_indices() currently_constrained = self.all_constrained_indices()
to_make_positive = [] to_make_positive = []
for s in positive_strings: for s in positive_strings:
@ -251,7 +246,7 @@ class model(parameterised):
def optimize(self, optimizer=None, start=None, **kwargs): def optimize(self, optimizer=None, start=None, **kwargs):
""" """
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. Optimize the Model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be: kwargs are passed to the optimizer. They can be:
:max_f_eval: maximum number of function evaluations :max_f_eval: maximum number of function evaluations
@ -274,7 +269,7 @@ class model(parameterised):
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs): def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
# assert self.Y.shape[1] > 1, "SGD only works with D > 1" # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) # @UndefinedVariable
sgd.run() sgd.run()
self.optimization_runs.append(sgd) self.optimization_runs.append(sgd)
@ -291,7 +286,7 @@ class model(parameterised):
def f(x): def f(x):
self._set_params(x) self._set_params(x)
return self.log_likelihood() return self.log_likelihood()
h = ndt.Hessian(f) h = ndt.Hessian(f) # @UndefinedVariable
A = -h(x) A = -h(x)
self._set_params(x) self._set_params(x)
# check for almost zero components on the diagonal which screw up the cholesky # check for almost zero components on the diagonal which screw up the cholesky
@ -300,7 +295,7 @@ class model(parameterised):
return A return A
def Laplace_evidence(self): def Laplace_evidence(self):
"""Returns an estiamte of the model evidence based on the Laplace approximation. """Returns an estiamte of the Model evidence based on the Laplace approximation.
Uses a numerical estimate of the hessian if none is available analytically""" Uses a numerical estimate of the hessian if none is available analytically"""
A = self.Laplace_covariance() A = self.Laplace_covariance()
try: try:
@ -310,7 +305,7 @@ class model(parameterised):
return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
def __str__(self): def __str__(self):
s = parameterised.__str__(self).split('\n') s = Parameterised.__str__(self).split('\n')
# add priors to the string # add priors to the string
if self.priors is not None: if self.priors is not None:
strs = [str(p) if p is not None else '' for p in self.priors] strs = [str(p) if p is not None else '' for p in self.priors]
@ -336,7 +331,7 @@ class model(parameterised):
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3): def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
""" """
Check the gradient of the model by comparing to a numerical estimate. Check the gradient of the Model by comparing to a numerical estimate.
If the verbose flag is passed, invividual components are tested (and printed) If the verbose flag is passed, invividual components are tested (and printed)
:param verbose: If True, print a "full" checking of each parameter :param verbose: If True, print a "full" checking of each parameter
@ -389,7 +384,7 @@ class model(parameterised):
param_list = range(len(x)) param_list = range(len(x))
else: else:
param_list = self.grep_param_names(target_param, transformed=True, search=True) param_list = self.grep_param_names(target_param, transformed=True, search=True)
if not param_list: if not np.any(param_list):
print "No free parameters to check" print "No free parameters to check"
return return
@ -419,15 +414,15 @@ class model(parameterised):
def input_sensitivity(self): def input_sensitivity(self):
""" """
return an array describing the sesitivity of the model to each input return an array describing the sesitivity of the Model to each input
NB. Right now, we're basing this on the lengthscales (or NB. Right now, we're basing this on the lengthscales (or
variances) of the kernel. TODO: proper sensitivity analysis variances) of the kernel. TODO: proper sensitivity analysis
where we integrate across the model inputs and evaluate the where we integrate across the Model inputs and evaluate the
effect on the variance of the model output. """ effect on the variance of the Model output. """
if not hasattr(self, 'kern'): if not hasattr(self, 'kern'):
raise ValueError, "this model has no kernel" raise ValueError, "this Model has no kernel"
k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']] k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
if (not len(k) == 1) or (not k[0].ARD): if (not len(k) == 1) or (not k[0].ARD):
@ -475,7 +470,7 @@ class model(parameterised):
if ll_change < 0: if ll_change < 0:
self.likelihood = last_approximation # restore previous likelihood approximation self.likelihood = last_approximation # restore previous likelihood approximation
self._set_params(last_params) # restore model parameters self._set_params(last_params) # restore Model parameters
print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
stop = True stop = True
else: else:

View file

@ -6,12 +6,10 @@ import numpy as np
import re import re
import copy import copy
import cPickle import cPickle
import os
from ..util.squashers import sigmoid
import warnings import warnings
import transformations import transformations
class parameterised(object): class Parameterised(object):
def __init__(self): def __init__(self):
""" """
This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters
@ -36,7 +34,7 @@ class parameterised(object):
""" """
Returns a **copy** of parameters in non transformed space Returns a **copy** of parameters in non transformed space
:see_also: :py:func:`GPy.core.parameterised.params_transformed` :see_also: :py:func:`GPy.core.Parameterised.params_transformed`
""" """
return self._get_params() return self._get_params()
@ -49,7 +47,7 @@ class parameterised(object):
""" """
Returns a **copy** of parameters in transformed space Returns a **copy** of parameters in transformed space
:see_also: :py:func:`GPy.core.parameterised.params` :see_also: :py:func:`GPy.core.Parameterised.params`
""" """
return self._get_params_transformed() return self._get_params_transformed()
@ -145,7 +143,7 @@ class parameterised(object):
else: else:
return np.nonzero([regexp.match(name) for name in names])[0] return np.nonzero([regexp.match(name) for name in names])[0]
def Nparam_transformed(self): def num_params_transformed(self):
removed = 0 removed = 0
for tie in self.tied_indices: for tie in self.tied_indices:
removed += tie.size - 1 removed += tie.size - 1
@ -292,7 +290,7 @@ class parameterised(object):
[np.put(xx, i, t.f(xx[i])) for i, t in zip(self.constrained_indices, self.constraints)] [np.put(xx, i, t.f(xx[i])) for i, t in zip(self.constrained_indices, self.constraints)]
if hasattr(self, 'debug'): if hasattr(self, 'debug'):
stop stop # @UndefinedVariable
return xx return xx
@ -354,7 +352,7 @@ class parameterised(object):
max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])]) max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])]) max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])])
cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4 cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
columns = cols.sum() # columns = cols.sum()
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))] header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string]) header_string = map(lambda x: '|'.join(x), [header_string])

View file

@ -13,13 +13,13 @@ class SparseGP(GPBase):
Variational sparse GP model Variational sparse GP model
:param X: inputs :param X: inputs
:type X: np.ndarray (N x input_dim) :type X: np.ndarray (num_data x input_dim)
:param likelihood: a likelihood instance, containing the observed data :param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace) :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
:param kernel : the kernel (covariance function). See link kernels :param kernel : the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance :type kernel: a GPy.kern.kern instance
:param X_variance: The uncertainty in the measurements of X (Gaussian variance) :param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (N x input_dim) | None :type X_variance: np.ndarray (num_data x input_dim) | None
:param Z: inducing inputs (optional, see note) :param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None :type Z: np.ndarray (num_inducing x input_dim) | None
:param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None) :param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None)
@ -69,7 +69,7 @@ class SparseGP(GPBase):
# The rather complex computations of self.A # The rather complex computations of self.A
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0) psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.num_data, 1, 1))).sum(0)
else: else:
psi2_beta = self.psi2.sum(0) * self.likelihood.precision psi2_beta = self.psi2.sum(0) * self.likelihood.precision
evals, evecs = linalg.eigh(psi2_beta) evals, evecs = linalg.eigh(psi2_beta)
@ -77,7 +77,7 @@ class SparseGP(GPBase):
tmp = evecs * np.sqrt(clipped_evals) tmp = evecs * np.sqrt(clipped_evals)
else: else:
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N))) tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.num_data)))
else: else:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision)) tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
@ -99,28 +99,28 @@ class SparseGP(GPBase):
# Compute dL_dKmm # Compute dL_dKmm
tmp = tdot(self._LBi_Lmi_psi1V) tmp = tdot(self._LBi_Lmi_psi1V)
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.input_dim * np.eye(self.num_inducing) + tmp) self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.output_dim * np.eye(self.num_inducing) + tmp)
tmp = -0.5 * self.DBi_plus_BiPBi tmp = -0.5 * self.DBi_plus_BiPBi
tmp += -0.5 * self.B * self.input_dim tmp += -0.5 * self.B * self.output_dim
tmp += self.input_dim * np.eye(self.num_inducing) tmp += self.output_dim * np.eye(self.num_inducing)
self.dL_dKmm = backsub_both_sides(self.Lm, tmp) self.dL_dKmm = backsub_both_sides(self.Lm, tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
self.dL_dpsi0 = -0.5 * self.input_dim * (self.likelihood.precision * np.ones([self.N, 1])).flatten() self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T) self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.input_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi) dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :] self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
else: else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.N)) self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.num_data))
self.dL_dpsi2 = None self.dL_dpsi2 = None
else: else:
dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
# repeat for each of the N psi_2 matrices # repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.N, axis=0) self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.num_data, axis=0)
else: else:
# subsume back into psi1 (==Kmn) # subsume back into psi1 (==Kmn)
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1) self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1)
@ -135,26 +135,26 @@ class SparseGP(GPBase):
raise NotImplementedError, "heteroscedatic derivates not implemented" raise NotImplementedError, "heteroscedatic derivates not implemented"
else: else:
# likelihood is not heterscedatic # likelihood is not heterscedatic
self.partial_for_likelihood = -0.5 * self.N * self.input_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.input_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision) self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V))) self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y) A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else: else:
A = -0.5 * self.N * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2)) C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + B + C + D + self.likelihood.Z return A + B + C + D + self.likelihood.Z
def _set_params(self, p): def _set_params(self, p):
self.Z = p[:self.num_inducing * self.output_dim].reshape(self.num_inducing, self.input_dim) self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam]) self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.num_params])
self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:]) self.likelihood._set_params(p[self.Z.size + self.kern.num_params:])
self._compute_kernel_matrices() self._compute_kernel_matrices()
self._computations() self._computations()

View file

@ -12,7 +12,7 @@ default_seed = np.random.seed(123344)
def BGPLVM(seed=default_seed): def BGPLVM(seed=default_seed):
N = 10 N = 10
M = 3 num_inducing = 3
Q = 2 Q = 2
D = 4 D = 4
# generate GPLVM-like data # generate GPLVM-like data
@ -26,7 +26,7 @@ def BGPLVM(seed=default_seed):
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
# k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
m = GPy.models.BayesianGPLVM(Y, Q, kernel=k, M=M) m = GPy.models.BayesianGPLVM(Y, Q, kernel=k, num_inducing=num_inducing)
m.constrain_positive('(rbf|bias|noise|white|S)') m.constrain_positive('(rbf|bias|noise|white|S)')
# m.constrain_fixed('S', 1) # m.constrain_fixed('S', 1)
@ -62,7 +62,7 @@ def GPLVM_oil_100(optimize=True):
m.plot_latent(labels=m.data_labels) m.plot_latent(labels=m.data_labels)
return m return m
def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False): def swiss_roll(optimize=True, N=1000, num_inducing=15, Q=4, sigma=.2, plot=False):
from GPy.util.datasets import swiss_roll_generated from GPy.util.datasets import swiss_roll_generated
from GPy.core.transformations import logexp_clipped from GPy.core.transformations import logexp_clipped
@ -100,11 +100,11 @@ def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False):
S = (var * np.ones_like(X) + np.clip(np.random.randn(N, Q) * var ** 2, S = (var * np.ones_like(X) + np.clip(np.random.randn(N, Q) * var ** 2,
- (1 - var), - (1 - var),
(1 - var))) + .001 (1 - var))) + .001
Z = np.random.permutation(X)[:M] Z = np.random.permutation(X)[:num_inducing]
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel) m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
m.data_colors = c m.data_colors = c
m.data_t = t m.data_t = t
@ -117,7 +117,7 @@ def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False):
m.optimize('scg', messages=1) m.optimize('scg', messages=1)
return m return m
def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k): def BGPLVM_oil(optimize=True, N=100, Q=5, num_inducing=25, max_f_eval=4e3, plot=False, **k):
np.random.seed(0) np.random.seed(0)
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
from GPy.core.transformations import logexp_clipped from GPy.core.transformations import logexp_clipped
@ -128,7 +128,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k)
Yn = Y - Y.mean(0) Yn = Y - Y.mean(0)
Yn /= Yn.std(0) Yn /= Yn.std(0)
m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, M=M, **k) m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
# m.constrain('variance|leng', logexp_clipped()) # m.constrain('variance|leng', logexp_clipped())
@ -167,7 +167,7 @@ def oil_100():
def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False): def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
x = np.linspace(0, 4 * np.pi, N)[:, None] x = np.linspace(0, 4 * np.pi, N)[:, None]
s1 = np.vectorize(lambda x: np.sin(x)) s1 = np.vectorize(lambda x: np.sin(x))
s2 = np.vectorize(lambda x: np.cos(x)) s2 = np.vectorize(lambda x: np.cos(x))
@ -227,13 +227,13 @@ def bgplvm_simulation_matlab_compare():
Y = sim_data['Y'] Y = sim_data['Y']
S = sim_data['S'] S = sim_data['S']
mu = sim_data['mu'] mu = sim_data['mu']
M, [_, Q] = 3, mu.shape num_inducing, [_, Q] = 3, mu.shape
from GPy.models import mrd from GPy.models import mrd
from GPy import kern from GPy import kern
reload(mrd); reload(kern) reload(mrd); reload(kern)
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = BayesianGPLVM(Y, Q, init="PCA", M=M, kernel=k, m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k,
# X=mu, # X=mu,
# X_variance=S, # X_variance=S,
_debug=False) _debug=False)
@ -247,8 +247,8 @@ def bgplvm_simulation(optimize='scg',
plot=True, plot=True,
max_f_eval=2e4): max_f_eval=2e4):
# from GPy.core.transformations import logexp_clipped # from GPy.core.transformations import logexp_clipped
D1, D2, D3, N, M, Q = 15, 8, 8, 100, 3, 5 D1, D2, D3, N, num_inducing, Q = 15, 8, 8, 100, 3, 5
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot) slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot)
from GPy.models import mrd from GPy.models import mrd
from GPy import kern from GPy import kern
@ -258,7 +258,7 @@ def bgplvm_simulation(optimize='scg',
Y = Ylist[0] Y = Ylist[0]
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q) k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, _debug=True)
# m.constrain('variance|noise', logexp_clipped()) # m.constrain('variance|noise', logexp_clipped())
m.ensure_default_constraints() m.ensure_default_constraints()
m['noise'] = Y.var() / 100. m['noise'] = Y.var() / 100.
@ -275,8 +275,8 @@ def bgplvm_simulation(optimize='scg',
return m return m
def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
D1, D2, D3, N, M, Q = 150, 200, 400, 500, 3, 7 D1, D2, D3, N, num_inducing, Q = 150, 200, 400, 500, 3, 7
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim) slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
from GPy.models import mrd from GPy.models import mrd
from GPy import kern from GPy import kern
@ -284,7 +284,7 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
reload(mrd); reload(kern) reload(mrd); reload(kern)
k = kern.linear(Q, [.05] * Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) k = kern.linear(Q, [.05] * Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = mrd.MRD(Ylist, input_dim=Q, M=M, kernels=k, initx="", initz='permute', **kw) m = mrd.MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw)
for i, Y in enumerate(Ylist): for i, Y in enumerate(Ylist):
m['{}_noise'.format(i + 1)] = Y.var() / 100. m['{}_noise'.format(i + 1)] = Y.var() / 100.
@ -312,7 +312,7 @@ def brendan_faces():
Yn /= Yn.std() Yn /= Yn.std()
m = GPy.models.GPLVM(Yn, Q) m = GPy.models.GPLVM(Yn, Q)
# m = GPy.models.BayesianGPLVM(Yn, Q, M=100) # m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=100)
# optimize # optimize
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
@ -376,16 +376,16 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True):
# X /= X.std(axis=0) # X /= X.std(axis=0)
# #
# Q = 10 # Q = 10
# M = 30 # num_inducing = 30
# #
# kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) # kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q)
# m = GPy.models.BayesianGPLVM(X, Q, kernel=kernel, M=M) # m = GPy.models.BayesianGPLVM(X, Q, kernel=kernel, num_inducing=num_inducing)
# # m.scale_factor = 100.0 # # m.scale_factor = 100.0
# m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)') # m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)')
# from sklearn import cluster # from sklearn import cluster
# km = cluster.KMeans(M, verbose=10) # km = cluster.KMeans(num_inducing, verbose=10)
# Z = km.fit(m.X).cluster_centers_ # Z = km.fit(m.X).cluster_centers_
# # Z = GPy.util.misc.kmm_init(m.X, M) # # Z = GPy.util.misc.kmm_init(m.X, num_inducing)
# m.set('iip', Z) # m.set('iip', Z)
# m.set('bias', 1e-4) # m.set('bias', 1e-4)
# # optimize # # optimize

View file

@ -10,16 +10,16 @@ import numpy as np
import GPy import GPy
def toy_rbf_1d(max_nb_eval_optim=100): def toy_rbf_1d(optimizer='tnc', max_nb_eval_optim=100):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
data = GPy.util.datasets.toy_rbf_1d() data = GPy.util.datasets.toy_rbf_1d()
# create simple GP model # create simple GP Model
m = GPy.models.GPRegression(data['X'],data['Y']) m = GPy.models.GPRegression(data['X'],data['Y'])
# optimize # optimize
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize(max_f_eval=max_nb_eval_optim) m.optimize(optimizer, max_f_eval=max_nb_eval_optim)
# plot # plot
m.plot() m.plot()
print(m) print(m)
@ -29,7 +29,7 @@ def rogers_girolami_olympics(optim_iters=100):
"""Run a standard Gaussian process regression on the Rogers and Girolami olympics data.""" """Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
data = GPy.util.datasets.rogers_girolami_olympics() data = GPy.util.datasets.rogers_girolami_olympics()
# create simple GP model # create simple GP Model
m = GPy.models.GPRegression(data['X'],data['Y']) m = GPy.models.GPRegression(data['X'],data['Y'])
#set the lengthscale to be something sensible (defaults to 1) #set the lengthscale to be something sensible (defaults to 1)
@ -48,7 +48,7 @@ def toy_rbf_1d_50(optim_iters=100):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
data = GPy.util.datasets.toy_rbf_1d_50() data = GPy.util.datasets.toy_rbf_1d_50()
# create simple GP model # create simple GP Model
m = GPy.models.GPRegression(data['X'],data['Y']) m = GPy.models.GPRegression(data['X'],data['Y'])
# optimize # optimize
@ -64,7 +64,7 @@ def silhouette(optim_iters=100):
"""Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper.""" """Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
data = GPy.util.datasets.silhouette() data = GPy.util.datasets.silhouette()
# create simple GP model # create simple GP Model
m = GPy.models.GPRegression(data['X'],data['Y']) m = GPy.models.GPRegression(data['X'],data['Y'])
# optimize # optimize
@ -151,8 +151,8 @@ def coregionalisation_sparse(optim_iters=100):
Y2 = -np.sin(X2) + np.random.randn(*X2.shape)*0.05 Y2 = -np.sin(X2) + np.random.randn(*X2.shape)*0.05
Y = np.vstack((Y1,Y2)) Y = np.vstack((Y1,Y2))
M = 40 num_inducing = 40
Z = np.hstack((np.random.rand(M,1)*8,np.random.randint(0,2,M)[:,None])) Z = np.hstack((np.random.rand(num_inducing,1)*8,np.random.randint(0,2,num_inducing)[:,None]))
k1 = GPy.kern.rbf(1) k1 = GPy.kern.rbf(1)
k2 = GPy.kern.Coregionalise(2,2) k2 = GPy.kern.Coregionalise(2,2)
@ -244,24 +244,24 @@ def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf):
lls = [] lls = []
total_var = np.var(data['Y']) total_var = np.var(data['Y'])
kernel = kernel_call(1, variance=1., lengthscale=1.) kernel = kernel_call(1, variance=1., lengthscale=1.)
model = GPy.models.GPRegression(data['X'], data['Y'], kernel=kernel) Model = GPy.models.GPRegression(data['X'], data['Y'], kernel=kernel)
for log_SNR in log_SNRs: for log_SNR in log_SNRs:
SNR = 10.**log_SNR SNR = 10.**log_SNR
noise_var = total_var/(1.+SNR) noise_var = total_var/(1.+SNR)
signal_var = total_var - noise_var signal_var = total_var - noise_var
model.kern['.*variance'] = signal_var Model.kern['.*variance'] = signal_var
model['noise_variance'] = noise_var Model['noise_variance'] = noise_var
length_scale_lls = [] length_scale_lls = []
for length_scale in length_scales: for length_scale in length_scales:
model['.*lengthscale'] = length_scale Model['.*lengthscale'] = length_scale
length_scale_lls.append(model.log_likelihood()) length_scale_lls.append(Model.log_likelihood())
lls.append(length_scale_lls) lls.append(length_scale_lls)
return np.array(lls) return np.array(lls)
def sparse_GP_regression_1D(N = 400, M = 5, optim_iters=100): def sparse_GP_regression_1D(N = 400, num_inducing = 5, optim_iters=100):
"""Run a 1D example of a sparse GP regression.""" """Run a 1D example of a sparse GP regression."""
# sample inputs and outputs # sample inputs and outputs
X = np.random.uniform(-3.,3.,(N,1)) X = np.random.uniform(-3.,3.,(N,1))
@ -270,8 +270,8 @@ def sparse_GP_regression_1D(N = 400, M = 5, optim_iters=100):
rbf = GPy.kern.rbf(1) rbf = GPy.kern.rbf(1)
noise = GPy.kern.white(1) noise = GPy.kern.white(1)
kernel = rbf + noise kernel = rbf + noise
# create simple GP model # create simple GP Model
m = GPy.models.SparseGPRegression(X, Y, kernel, M=M) m = GPy.models.SparseGPRegression(X, Y, kernel, num_inducing=num_inducing)
m.ensure_default_constraints() m.ensure_default_constraints()
@ -280,7 +280,7 @@ def sparse_GP_regression_1D(N = 400, M = 5, optim_iters=100):
m.plot() m.plot()
return m return m
def sparse_GP_regression_2D(N = 400, M = 50, optim_iters=100): def sparse_GP_regression_2D(N = 400, num_inducing = 50, optim_iters=100):
"""Run a 2D example of a sparse GP regression.""" """Run a 2D example of a sparse GP regression."""
X = np.random.uniform(-3.,3.,(N,2)) X = np.random.uniform(-3.,3.,(N,2))
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05 Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05
@ -290,8 +290,8 @@ def sparse_GP_regression_2D(N = 400, M = 50, optim_iters=100):
noise = GPy.kern.white(2) noise = GPy.kern.white(2)
kernel = rbf + noise kernel = rbf + noise
# create simple GP model # create simple GP Model
m = GPy.models.SparseGPRegression(X,Y,kernel, M = M) m = GPy.models.SparseGPRegression(X,Y,kernel, num_inducing = num_inducing)
# contrain all parameters to be positive (but not inducing inputs) # contrain all parameters to be positive (but not inducing inputs)
m.ensure_default_constraints() m.ensure_default_constraints()
@ -318,7 +318,7 @@ def uncertain_inputs_sparse_regression(optim_iters=100):
k = GPy.kern.rbf(1) + GPy.kern.white(1) k = GPy.kern.rbf(1) + GPy.kern.white(1)
# create simple GP model - no input uncertainty on this one # create simple GP Model - no input uncertainty on this one
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z) m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize('scg', messages=1, max_f_eval=optim_iters) m.optimize('scg', messages=1, max_f_eval=optim_iters)
@ -326,7 +326,7 @@ def uncertain_inputs_sparse_regression(optim_iters=100):
axes[0].set_title('no input uncertainty') axes[0].set_title('no input uncertainty')
#the same model with uncertainty #the same Model with uncertainty
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S) m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S)
m.ensure_default_constraints() m.ensure_default_constraints()
m.optimize('scg', messages=1, max_f_eval=optim_iters) m.optimize('scg', messages=1, max_f_eval=optim_iters)

View file

@ -33,7 +33,7 @@ def tuto_GP_regression():
m.optimize() m.optimize()
m.optimize_restarts(Nrestarts = 10) m.optimize_restarts(num_restarts = 10)
########################### ###########################
# 2-dimensional example # # 2-dimensional example #

View file

@ -11,17 +11,17 @@ class opt_SGD(Optimizer):
Optimize using stochastic gradient descent. Optimize using stochastic gradient descent.
*** Parameters *** *** Parameters ***
model: reference to the model object Model: reference to the Model object
iterations: number of iterations iterations: number of iterations
learning_rate: learning rate learning_rate: learning rate
momentum: momentum momentum: momentum
""" """
def __init__(self, start, iterations = 10, learning_rate = 1e-4, momentum = 0.9, model = None, messages = False, batch_size = 1, self_paced = False, center = True, iteration_file = None, learning_rate_adaptation=None, actual_iter=None, schedule=None, **kwargs): def __init__(self, start, iterations = 10, learning_rate = 1e-4, momentum = 0.9, Model = None, messages = False, batch_size = 1, self_paced = False, center = True, iteration_file = None, learning_rate_adaptation=None, actual_iter=None, schedule=None, **kwargs):
self.opt_name = "Stochastic Gradient Descent" self.opt_name = "Stochastic Gradient Descent"
self.model = model self.Model = Model
self.iterations = iterations self.iterations = iterations
self.momentum = momentum self.momentum = momentum
self.learning_rate = learning_rate self.learning_rate = learning_rate
@ -42,17 +42,17 @@ class opt_SGD(Optimizer):
self.learning_rate_0 = self.learning_rate.mean() self.learning_rate_0 = self.learning_rate.mean()
self.schedule = schedule self.schedule = schedule
# if len([p for p in self.model.kern.parts if p.name == 'bias']) == 1: # if len([p for p in self.Model.kern.parts if p.name == 'bias']) == 1:
# self.param_traces.append(('bias',[])) # self.param_traces.append(('bias',[]))
# if len([p for p in self.model.kern.parts if p.name == 'linear']) == 1: # if len([p for p in self.Model.kern.parts if p.name == 'linear']) == 1:
# self.param_traces.append(('linear',[])) # self.param_traces.append(('linear',[]))
# if len([p for p in self.model.kern.parts if p.name == 'rbf']) == 1: # if len([p for p in self.Model.kern.parts if p.name == 'rbf']) == 1:
# self.param_traces.append(('rbf_var',[])) # self.param_traces.append(('rbf_var',[]))
self.param_traces = dict(self.param_traces) self.param_traces = dict(self.param_traces)
self.fopt_trace = [] self.fopt_trace = []
num_params = len(self.model._get_params()) num_params = len(self.Model._get_params())
if isinstance(self.learning_rate, float): if isinstance(self.learning_rate, float):
self.learning_rate = np.ones((num_params,)) * self.learning_rate self.learning_rate = np.ones((num_params,)) * self.learning_rate
@ -84,7 +84,7 @@ class opt_SGD(Optimizer):
return (np.isnan(data).sum(axis=1) == 0) return (np.isnan(data).sum(axis=1) == 0)
def check_for_missing(self, data): def check_for_missing(self, data):
if sp.sparse.issparse(self.model.likelihood.Y): if sp.sparse.issparse(self.Model.likelihood.Y):
return True return True
else: else:
return np.isnan(data).sum() > 0 return np.isnan(data).sum() > 0
@ -107,32 +107,32 @@ class opt_SGD(Optimizer):
def shift_constraints(self, j): def shift_constraints(self, j):
constrained_indices = copy.deepcopy(self.model.constrained_indices) constrained_indices = copy.deepcopy(self.Model.constrained_indices)
for c, constraint in enumerate(constrained_indices): for c, constraint in enumerate(constrained_indices):
mask = (np.ones_like(constrained_indices[c]) == 1) mask = (np.ones_like(constrained_indices[c]) == 1)
for i in range(len(constrained_indices[c])): for i in range(len(constrained_indices[c])):
pos = np.where(j == constrained_indices[c][i])[0] pos = np.where(j == constrained_indices[c][i])[0]
if len(pos) == 1: if len(pos) == 1:
self.model.constrained_indices[c][i] = pos self.Model.constrained_indices[c][i] = pos
else: else:
mask[i] = False mask[i] = False
self.model.constrained_indices[c] = self.model.constrained_indices[c][mask] self.Model.constrained_indices[c] = self.Model.constrained_indices[c][mask]
return constrained_indices return constrained_indices
# back them up # back them up
# bounded_i = copy.deepcopy(self.model.constrained_bounded_indices) # bounded_i = copy.deepcopy(self.Model.constrained_bounded_indices)
# bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers) # bounded_l = copy.deepcopy(self.Model.constrained_bounded_lowers)
# bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers) # bounded_u = copy.deepcopy(self.Model.constrained_bounded_uppers)
# for b in range(len(bounded_i)): # for each group of constraints # for b in range(len(bounded_i)): # for each group of constraints
# for bc in range(len(bounded_i[b])): # for bc in range(len(bounded_i[b])):
# pos = np.where(j == bounded_i[b][bc])[0] # pos = np.where(j == bounded_i[b][bc])[0]
# if len(pos) == 1: # if len(pos) == 1:
# pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0] # pos2 = np.where(self.Model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
# self.model.constrained_bounded_indices[b][pos2] = pos[0] # self.Model.constrained_bounded_indices[b][pos2] = pos[0]
# else: # else:
# if len(self.model.constrained_bounded_indices[b]) == 1: # if len(self.Model.constrained_bounded_indices[b]) == 1:
# # if it's the last index to be removed # # if it's the last index to be removed
# # the logic here is just a mess. If we remove the last one, then all the # # the logic here is just a mess. If we remove the last one, then all the
# # b-indices change and we have to iterate through everything to find our # # b-indices change and we have to iterate through everything to find our
@ -140,35 +140,35 @@ class opt_SGD(Optimizer):
# raise NotImplementedError # raise NotImplementedError
# else: # just remove it from the indices # else: # just remove it from the indices
# mask = self.model.constrained_bounded_indices[b] != bc # mask = self.Model.constrained_bounded_indices[b] != bc
# self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask] # self.Model.constrained_bounded_indices[b] = self.Model.constrained_bounded_indices[b][mask]
# # here we shif the positive constraints. We cycle through each positive # # here we shif the positive constraints. We cycle through each positive
# # constraint # # constraint
# positive = self.model.constrained_positive_indices.copy() # positive = self.Model.constrained_positive_indices.copy()
# mask = (np.ones_like(positive) == 1) # mask = (np.ones_like(positive) == 1)
# for p in range(len(positive)): # for p in range(len(positive)):
# # we now check whether the constrained index appears in the j vector # # we now check whether the constrained index appears in the j vector
# # (the vector of the "active" indices) # # (the vector of the "active" indices)
# pos = np.where(j == self.model.constrained_positive_indices[p])[0] # pos = np.where(j == self.Model.constrained_positive_indices[p])[0]
# if len(pos) == 1: # if len(pos) == 1:
# self.model.constrained_positive_indices[p] = pos # self.Model.constrained_positive_indices[p] = pos
# else: # else:
# mask[p] = False # mask[p] = False
# self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask] # self.Model.constrained_positive_indices = self.Model.constrained_positive_indices[mask]
# return (bounded_i, bounded_l, bounded_u), positive # return (bounded_i, bounded_l, bounded_u), positive
def restore_constraints(self, c):#b, p): def restore_constraints(self, c):#b, p):
# self.model.constrained_bounded_indices = b[0] # self.Model.constrained_bounded_indices = b[0]
# self.model.constrained_bounded_lowers = b[1] # self.Model.constrained_bounded_lowers = b[1]
# self.model.constrained_bounded_uppers = b[2] # self.Model.constrained_bounded_uppers = b[2]
# self.model.constrained_positive_indices = p # self.Model.constrained_positive_indices = p
self.model.constrained_indices = c self.Model.constrained_indices = c
def get_param_shapes(self, N = None, input_dim = None): def get_param_shapes(self, N = None, input_dim = None):
model_name = self.model.__class__.__name__ model_name = self.Model.__class__.__name__
if model_name == 'GPLVM': if model_name == 'GPLVM':
return [(N, input_dim)] return [(N, input_dim)]
if model_name == 'Bayesian_GPLVM': if model_name == 'Bayesian_GPLVM':
@ -179,37 +179,37 @@ class opt_SGD(Optimizer):
def step_with_missing_data(self, f_fp, X, step, shapes): def step_with_missing_data(self, f_fp, X, step, shapes):
N, input_dim = X.shape N, input_dim = X.shape
if not sp.sparse.issparse(self.model.likelihood.Y): if not sp.sparse.issparse(self.Model.likelihood.Y):
Y = self.model.likelihood.Y Y = self.Model.likelihood.Y
samples = self.non_null_samples(self.model.likelihood.Y) samples = self.non_null_samples(self.Model.likelihood.Y)
self.model.N = samples.sum() self.Model.N = samples.sum()
Y = Y[samples] Y = Y[samples]
else: else:
samples = self.model.likelihood.Y.nonzero()[0] samples = self.Model.likelihood.Y.nonzero()[0]
self.model.N = len(samples) self.Model.N = len(samples)
Y = np.asarray(self.model.likelihood.Y[samples].todense(), dtype = np.float64) Y = np.asarray(self.Model.likelihood.Y[samples].todense(), dtype = np.float64)
if self.model.N == 0 or Y.std() == 0.0: if self.Model.N == 0 or Y.std() == 0.0:
return 0, step, self.model.N return 0, step, self.Model.N
self.model.likelihood._offset = Y.mean() self.Model.likelihood._offset = Y.mean()
self.model.likelihood._scale = Y.std() self.Model.likelihood._scale = Y.std()
self.model.likelihood.set_data(Y) self.Model.likelihood.set_data(Y)
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision # self.Model.likelihood.V = self.Model.likelihood.Y*self.Model.likelihood.precision
sigma = self.model.likelihood._variance sigma = self.Model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache self.Model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma) self.Model.likelihood._set_params(sigma)
j = self.subset_parameter_vector(self.x_opt, samples, shapes) j = self.subset_parameter_vector(self.x_opt, samples, shapes)
self.model.X = X[samples] self.Model.X = X[samples]
model_name = self.model.__class__.__name__ model_name = self.Model.__class__.__name__
if model_name == 'Bayesian_GPLVM': if model_name == 'Bayesian_GPLVM':
self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T) self.Model.likelihood.YYT = np.dot(self.Model.likelihood.Y, self.Model.likelihood.Y.T)
self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT) self.Model.likelihood.trYYT = np.trace(self.Model.likelihood.YYT)
ci = self.shift_constraints(j) ci = self.shift_constraints(j)
f, fp = f_fp(self.x_opt[j]) f, fp = f_fp(self.x_opt[j])
@ -218,18 +218,18 @@ class opt_SGD(Optimizer):
self.x_opt[j] -= step[j] self.x_opt[j] -= step[j]
self.restore_constraints(ci) self.restore_constraints(ci)
self.model.grads[j] = fp self.Model.grads[j] = fp
# restore likelihood _offset and _scale, otherwise when we call set_data(y) on # restore likelihood _offset and _scale, otherwise when we call set_data(y) on
# the next feature, it will get normalized with the mean and std of this one. # the next feature, it will get normalized with the mean and std of this one.
self.model.likelihood._offset = 0 self.Model.likelihood._offset = 0
self.model.likelihood._scale = 1 self.Model.likelihood._scale = 1
return f, step, self.model.N return f, step, self.Model.N
def adapt_learning_rate(self, t, D): def adapt_learning_rate(self, t, D):
if self.learning_rate_adaptation == 'adagrad': if self.learning_rate_adaptation == 'adagrad':
if t > 0: if t > 0:
g_k = self.model.grads g_k = self.Model.grads
self.s_k += np.square(g_k) self.s_k += np.square(g_k)
t0 = 100.0 t0 = 100.0
self.learning_rate = 0.1/(t0 + np.sqrt(self.s_k)) self.learning_rate = 0.1/(t0 + np.sqrt(self.s_k))
@ -245,8 +245,8 @@ class opt_SGD(Optimizer):
elif self.learning_rate_adaptation == 'semi_pesky': elif self.learning_rate_adaptation == 'semi_pesky':
if self.model.__class__.__name__ == 'Bayesian_GPLVM': if self.Model.__class__.__name__ == 'Bayesian_GPLVM':
g_t = self.model.grads g_t = self.Model.grads
if t == 0: if t == 0:
self.hbar_t = 0.0 self.hbar_t = 0.0
self.tau_t = 100.0 self.tau_t = 100.0
@ -259,28 +259,28 @@ class opt_SGD(Optimizer):
def opt(self, f_fp=None, f=None, fp=None): def opt(self, f_fp=None, f=None, fp=None):
self.x_opt = self.model._get_params_transformed() self.x_opt = self.Model._get_params_transformed()
self.grads = [] self.grads = []
X, Y = self.model.X.copy(), self.model.likelihood.Y.copy() X, Y = self.Model.X.copy(), self.Model.likelihood.Y.copy()
self.model.likelihood.YYT = 0 self.Model.likelihood.YYT = 0
self.model.likelihood.trYYT = 0 self.Model.likelihood.trYYT = 0
self.model.likelihood._offset = 0.0 self.Model.likelihood._offset = 0.0
self.model.likelihood._scale = 1.0 self.Model.likelihood._scale = 1.0
N, input_dim = self.model.X.shape N, input_dim = self.Model.X.shape
D = self.model.likelihood.Y.shape[1] D = self.Model.likelihood.Y.shape[1]
num_params = self.model._get_params() num_params = self.Model._get_params()
self.trace = [] self.trace = []
missing_data = self.check_for_missing(self.model.likelihood.Y) missing_data = self.check_for_missing(self.Model.likelihood.Y)
step = np.zeros_like(num_params) step = np.zeros_like(num_params)
for it in range(self.iterations): for it in range(self.iterations):
if self.actual_iter != None: if self.actual_iter != None:
it = self.actual_iter it = self.actual_iter
self.model.grads = np.zeros_like(self.x_opt) # TODO this is ugly self.Model.grads = np.zeros_like(self.x_opt) # TODO this is ugly
if it == 0 or self.self_paced is False: if it == 0 or self.self_paced is False:
features = np.random.permutation(Y.shape[1]) features = np.random.permutation(Y.shape[1])
@ -292,29 +292,29 @@ class opt_SGD(Optimizer):
NLL = [] NLL = []
import pylab as plt import pylab as plt
for count, j in enumerate(features): for count, j in enumerate(features):
self.model.input_dim = len(j) self.Model.input_dim = len(j)
self.model.likelihood.input_dim = len(j) self.Model.likelihood.input_dim = len(j)
self.model.likelihood.set_data(Y[:, j]) self.Model.likelihood.set_data(Y[:, j])
# self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision # self.Model.likelihood.V = self.Model.likelihood.Y*self.Model.likelihood.precision
sigma = self.model.likelihood._variance sigma = self.Model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache self.Model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma) self.Model.likelihood._set_params(sigma)
if missing_data: if missing_data:
shapes = self.get_param_shapes(N, input_dim) shapes = self.get_param_shapes(N, input_dim)
f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes) f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes)
else: else:
self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T) self.Model.likelihood.YYT = np.dot(self.Model.likelihood.Y, self.Model.likelihood.Y.T)
self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT) self.Model.likelihood.trYYT = np.trace(self.Model.likelihood.YYT)
Nj = N Nj = N
f, fp = f_fp(self.x_opt) f, fp = f_fp(self.x_opt)
self.model.grads = fp.copy() self.Model.grads = fp.copy()
step = self.momentum * step + self.learning_rate * fp step = self.momentum * step + self.learning_rate * fp
self.x_opt -= step self.x_opt -= step
if self.messages == 2: if self.messages == 2:
noise = self.model.likelihood._variance noise = self.Model.likelihood._variance
status = "evaluating {feature: 5d}/{tot: 5d} \t f: {f: 2.3f} \t non-missing: {nm: 4d}\t noise: {noise: 2.4f}\r".format(feature = count, tot = len(features), f = f, nm = Nj, noise = noise) status = "evaluating {feature: 5d}/{tot: 5d} \t f: {f: 2.3f} \t non-missing: {nm: 4d}\t noise: {noise: 2.4f}\r".format(feature = count, tot = len(features), f = f, nm = Nj, noise = noise)
sys.stdout.write(status) sys.stdout.write(status)
sys.stdout.flush() sys.stdout.flush()
@ -328,19 +328,19 @@ class opt_SGD(Optimizer):
# plt.plot(self.param_traces['noise']) # plt.plot(self.param_traces['noise'])
# for k in self.param_traces.keys(): # for k in self.param_traces.keys():
# self.param_traces[k].append(self.model.get(k)[0]) # self.param_traces[k].append(self.Model.get(k)[0])
self.grads.append(self.model.grads.tolist()) self.grads.append(self.Model.grads.tolist())
# should really be a sum(), but earlier samples in the iteration will have a very crappy ll # should really be a sum(), but earlier samples in the iteration will have a very crappy ll
self.f_opt = np.mean(NLL) self.f_opt = np.mean(NLL)
self.model.N = N self.Model.N = N
self.model.X = X self.Model.X = X
self.model.input_dim = D self.Model.input_dim = D
self.model.likelihood.N = N self.Model.likelihood.N = N
self.model.likelihood.input_dim = D self.Model.likelihood.input_dim = D
self.model.likelihood.Y = Y self.Model.likelihood.Y = Y
sigma = self.model.likelihood._variance sigma = self.Model.likelihood._variance
self.model.likelihood._variance = None # invalidate cache self.Model.likelihood._variance = None # invalidate cache
self.model.likelihood._set_params(sigma) self.Model.likelihood._set_params(sigma)
self.trace.append(self.f_opt) self.trace.append(self.f_opt)
if self.iteration_file is not None: if self.iteration_file is not None:

View file

@ -2,14 +2,14 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
def theta(x): def theta(x):
"""Heavisdie step function""" """Heavisdie step function"""
return np.where(x>=0.,1.,0.) return np.where(x>=0.,1.,0.)
class Brownian(kernpart): class Brownian(Kernpart):
""" """
Brownian Motion kernel. Brownian Motion kernel.
@ -21,7 +21,7 @@ class Brownian(kernpart):
def __init__(self,input_dim,variance=1.): def __init__(self,input_dim,variance=1.):
self.input_dim = input_dim self.input_dim = input_dim
assert self.input_dim==1, "Brownian motion in 1D only" assert self.input_dim==1, "Brownian motion in 1D only"
self.Nparam = 1. self.num_params = 1.
self.name = 'Brownian' self.name = 'Brownian'
self._set_params(np.array([variance]).flatten()) self._set_params(np.array([variance]).flatten())

View file

@ -2,13 +2,11 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
import hashlib
from ..util.linalg import pdinv,mdot
from scipy import integrate from scipy import integrate
class Matern32(kernpart): class Matern32(Kernpart):
""" """
Matern 3/2 kernel: Matern 3/2 kernel:
@ -32,7 +30,7 @@ class Matern32(kernpart):
self.input_dim = input_dim self.input_dim = input_dim
self.ARD = ARD self.ARD = ARD
if ARD == False: if ARD == False:
self.Nparam = 2 self.num_params = 2
self.name = 'Mat32' self.name = 'Mat32'
if lengthscale is not None: if lengthscale is not None:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
@ -40,7 +38,7 @@ class Matern32(kernpart):
else: else:
lengthscale = np.ones(1) lengthscale = np.ones(1)
else: else:
self.Nparam = self.input_dim + 1 self.num_params = self.input_dim + 1
self.name = 'Mat32' self.name = 'Mat32'
if lengthscale is not None: if lengthscale is not None:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
@ -55,13 +53,13 @@ class Matern32(kernpart):
def _set_params(self, x): def _set_params(self, x):
"""set the value of the parameters.""" """set the value of the parameters."""
assert x.size == self.Nparam assert x.size == self.num_params
self.variance = x[0] self.variance = x[0]
self.lengthscale = x[1:] self.lengthscale = x[1:]
def _get_param_names(self): def _get_param_names(self):
"""return parameter names.""" """return parameter names."""
if self.Nparam == 2: if self.num_params == 2:
return ['variance', 'lengthscale'] return ['variance', 'lengthscale']
else: else:
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)] return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]

View file

@ -2,12 +2,12 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
import hashlib import hashlib
from scipy import integrate from scipy import integrate
class Matern52(kernpart): class Matern52(Kernpart):
""" """
Matern 5/2 kernel: Matern 5/2 kernel:
@ -30,7 +30,7 @@ class Matern52(kernpart):
self.input_dim = input_dim self.input_dim = input_dim
self.ARD = ARD self.ARD = ARD
if ARD == False: if ARD == False:
self.Nparam = 2 self.num_params = 2
self.name = 'Mat52' self.name = 'Mat52'
if lengthscale is not None: if lengthscale is not None:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
@ -38,7 +38,7 @@ class Matern52(kernpart):
else: else:
lengthscale = np.ones(1) lengthscale = np.ones(1)
else: else:
self.Nparam = self.input_dim + 1 self.num_params = self.input_dim + 1
self.name = 'Mat52' self.name = 'Mat52'
if lengthscale is not None: if lengthscale is not None:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
@ -53,13 +53,13 @@ class Matern52(kernpart):
def _set_params(self,x): def _set_params(self,x):
"""set the value of the parameters.""" """set the value of the parameters."""
assert x.size == self.Nparam assert x.size == self.num_params
self.variance = x[0] self.variance = x[0]
self.lengthscale = x[1:] self.lengthscale = x[1:]
def _get_param_names(self): def _get_param_names(self):
"""return parameter names.""" """return parameter names."""
if self.Nparam == 2: if self.num_params == 2:
return ['variance','lengthscale'] return ['variance','lengthscale']
else: else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)] return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, Coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, Coregionalise, rational_quadratic, Fixed, rbfcos, IndependentOutputs
try: try:
from constructors import rbf_sympy, sympykern # these depend on sympy from constructors import rbf_sympy, sympykern # these depend on sympy
except: except:

View file

@ -2,11 +2,11 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
import hashlib import hashlib
class bias(kernpart): class bias(Kernpart):
def __init__(self,input_dim,variance=1.): def __init__(self,input_dim,variance=1.):
""" """
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
@ -15,7 +15,7 @@ class bias(kernpart):
:type variance: float :type variance: float
""" """
self.input_dim = input_dim self.input_dim = input_dim
self.Nparam = 1 self.num_params = 1
self.name = 'bias' self.name = 'bias'
self._set_params(np.array([variance]).flatten()) self._set_params(np.array([variance]).flatten())

View file

@ -12,7 +12,7 @@ from exponential import exponential as exponentialpart
from Matern32 import Matern32 as Matern32part from Matern32 import Matern32 as Matern32part
from Matern52 import Matern52 as Matern52part from Matern52 import Matern52 as Matern52part
from bias import bias as biaspart from bias import bias as biaspart
from fixed import fixed as fixedpart from fixed import Fixed as fixedpart
from finite_dimensional import finite_dimensional as finite_dimensionalpart from finite_dimensional import finite_dimensional as finite_dimensionalpart
from spline import spline as splinepart from spline import spline as splinepart
from Brownian import Brownian as Brownianpart from Brownian import Brownian as Brownianpart
@ -24,7 +24,7 @@ from symmetric import symmetric as symmetric_part
from coregionalise import Coregionalise as coregionalise_part from coregionalise import Coregionalise as coregionalise_part
from rational_quadratic import rational_quadratic as rational_quadraticpart from rational_quadratic import rational_quadratic as rational_quadraticpart
from rbfcos import rbfcos as rbfcospart from rbfcos import rbfcos as rbfcospart
from independent_outputs import independent_outputs as independent_output_part from independent_outputs import IndependentOutputs as independent_output_part
#TODO these s=constructors are not as clean as we'd like. Tidy the code up #TODO these s=constructors are not as clean as we'd like. Tidy the code up
#using meta-classes to make the objects construct properly wthout them. #using meta-classes to make the objects construct properly wthout them.
@ -294,9 +294,9 @@ def rational_quadratic(D,variance=1., lengthscale=1., power=1.):
part = rational_quadraticpart(D,variance, lengthscale, power) part = rational_quadraticpart(D,variance, lengthscale, power)
return kern(D, [part]) return kern(D, [part])
def fixed(D, K, variance=1.): def Fixed(D, K, variance=1.):
""" """
Construct a fixed effect kernel. Construct a Fixed effect kernel.
Arguments Arguments
--------- ---------
@ -314,7 +314,7 @@ def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False):
part = rbfcospart(D,variance,frequencies,bandwidths,ARD) part = rbfcospart(D,variance,frequencies,bandwidths,ARD)
return kern(D,[part]) return kern(D,[part])
def independent_outputs(k): def IndependentOutputs(k):
""" """
Construct a kernel with independent outputs from an existing kernel Construct a kernel with independent outputs from an existing kernel
""" """

View file

@ -1,13 +1,13 @@
# Copyright (c) 2012, James Hensman and Ricardo Andrade # Copyright (c) 2012, James Hensman and Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
from GPy.util.linalg import mdot, pdinv from GPy.util.linalg import mdot, pdinv
import pdb import pdb
from scipy import weave from scipy import weave
class Coregionalise(kernpart): class Coregionalise(Kernpart):
""" """
Kernel for Intrinsic Corregionalization Models Kernel for Intrinsic Corregionalization Models
""" """
@ -26,14 +26,14 @@ class Coregionalise(kernpart):
else: else:
assert kappa.shape==(self.Nout,) assert kappa.shape==(self.Nout,)
self.kappa = kappa self.kappa = kappa
self.Nparam = self.Nout*(self.R + 1) self.num_params = self.Nout*(self.R + 1)
self._set_params(np.hstack([self.W.flatten(),self.kappa])) self._set_params(np.hstack([self.W.flatten(),self.kappa]))
def _get_params(self): def _get_params(self):
return np.hstack([self.W.flatten(),self.kappa]) return np.hstack([self.W.flatten(),self.kappa])
def _set_params(self,x): def _set_params(self,x):
assert x.size == self.Nparam assert x.size == self.num_params
self.kappa = x[-self.Nout:] self.kappa = x[-self.Nout:]
self.W = x[:-self.Nout].reshape(self.Nout,self.R) self.W = x[:-self.Nout].reshape(self.Nout,self.R)
self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa) self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa)
@ -69,14 +69,14 @@ class Coregionalise(kernpart):
else: else:
index2 = np.asarray(index2,dtype=np.int) index2 = np.asarray(index2,dtype=np.int)
code=""" code="""
for(int i=0;i<M; i++){ for(int i=0;i<num_inducing; i++){
for(int j=0; j<N; j++){ for(int j=0; j<N; j++){
target[i+j*M] += B[Nout*index[j]+index2[i]]; target[i+j*num_inducing] += B[Nout*index[j]+index2[i]];
} }
} }
""" """
N,M,B,Nout = index.size,index2.size, self.B, self.Nout N,num_inducing,B,Nout = index.size,index2.size, self.B, self.Nout
weave.inline(code,['target','index','index2','N','M','B','Nout']) weave.inline(code,['target','index','index2','N','num_inducing','B','Nout'])
def Kdiag(self,index,target): def Kdiag(self,index,target):
@ -91,14 +91,14 @@ class Coregionalise(kernpart):
index2 = np.asarray(index2,dtype=np.int) index2 = np.asarray(index2,dtype=np.int)
code=""" code="""
for(int i=0; i<M; i++){ for(int i=0; i<num_inducing; i++){
for(int j=0; j<N; j++){ for(int j=0; j<N; j++){
dL_dK_small[index[j] + Nout*index2[i]] += dL_dK[i+j*M]; dL_dK_small[index[j] + Nout*index2[i]] += dL_dK[i+j*num_inducing];
} }
} }
""" """
N, M, Nout = index.size, index2.size, self.Nout N, num_inducing, Nout = index.size, index2.size, self.Nout
weave.inline(code, ['N','M','Nout','dL_dK','dL_dK_small','index','index2']) weave.inline(code, ['N','num_inducing','Nout','dL_dK','dL_dK_small','index','index2'])
dkappa = np.diag(dL_dK_small) dkappa = np.diag(dL_dK_small)
dL_dK_small += dL_dK_small.T dL_dK_small += dL_dK_small.T

View file

@ -2,21 +2,20 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
import hashlib
from scipy import integrate from scipy import integrate
class exponential(kernpart): class exponential(Kernpart):
""" """
Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2) Exponential kernel (aka Ornstein-Uhlenbeck or Matern 1/2)
.. math:: .. math::
k(r) = \sigma^2 \exp(- r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } k(r) = \sigma^2 \exp(- r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
:param D: the number of input dimensions :param input_dim: the number of input dimensions
:type D: int :type input_dim: int
:param variance: the variance :math:`\sigma^2` :param variance: the variance :math:`\sigma^2`
:type variance: float :type variance: float
:param lengthscale: the vector of lengthscale :math:`\ell_i` :param lengthscale: the vector of lengthscale :math:`\ell_i`
@ -26,11 +25,11 @@ class exponential(kernpart):
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self,D,variance=1.,lengthscale=None,ARD=False): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False):
self.D = D self.input_dim = input_dim
self.ARD = ARD self.ARD = ARD
if ARD == False: if ARD == False:
self.Nparam = 2 self.num_params = 2
self.name = 'exp' self.name = 'exp'
if lengthscale is not None: if lengthscale is not None:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
@ -38,13 +37,13 @@ class exponential(kernpart):
else: else:
lengthscale = np.ones(1) lengthscale = np.ones(1)
else: else:
self.Nparam = self.D + 1 self.num_params = self.input_dim + 1
self.name = 'exp' self.name = 'exp'
if lengthscale is not None: if lengthscale is not None:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.D, "bad number of lengthscales" assert lengthscale.size == self.input_dim, "bad number of lengthscales"
else: else:
lengthscale = np.ones(self.D) lengthscale = np.ones(self.input_dim)
self._set_params(np.hstack((variance, lengthscale.flatten()))) self._set_params(np.hstack((variance, lengthscale.flatten())))
def _get_params(self): def _get_params(self):
@ -53,13 +52,13 @@ class exponential(kernpart):
def _set_params(self, x): def _set_params(self, x):
"""set the value of the parameters.""" """set the value of the parameters."""
assert x.size == self.Nparam assert x.size == self.num_params
self.variance = x[0] self.variance = x[0]
self.lengthscale = x[1:] self.lengthscale = x[1:]
def _get_param_names(self): def _get_param_names(self):
"""return parameter names.""" """return parameter names."""
if self.Nparam == 2: if self.num_params == 2:
return ['variance', 'lengthscale'] return ['variance', 'lengthscale']
else: else:
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)] return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
@ -107,7 +106,7 @@ class exponential(kernpart):
def Gram_matrix(self, F, F1, lower, upper): def Gram_matrix(self, F, F1, lower, upper):
""" """
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1. Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions :param F: vector of functions
:type F: np.array :type F: np.array
@ -116,7 +115,7 @@ class exponential(kernpart):
:param lower,upper: boundaries of the input domain :param lower,upper: boundaries of the input domain
:type lower,upper: floats :type lower,upper: floats
""" """
assert self.D == 1 assert self.input_dim == 1
def L(x, i): def L(x, i):
return(1. / self.lengthscale * F[i](x) + F1[i](x)) return(1. / self.lengthscale * F[i](x) + F1[i](x))
n = F.shape[0] n = F.shape[0]

View file

@ -2,21 +2,21 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
from ..util.linalg import pdinv,mdot from ..util.linalg import pdinv,mdot
class finite_dimensional(kernpart): class finite_dimensional(Kernpart):
def __init__(self, D, F, G, variance=1., weights=None): def __init__(self, input_dim, F, G, variance=1., weights=None):
""" """
Argumnents Argumnents
---------- ----------
D: int - the number of input dimensions input_dim: int - the number of input dimensions
F: np.array of functions with shape (n,) - the n basis functions F: np.array of functions with shape (n,) - the n basis functions
G: np.array with shape (n,n) - the Gram matrix associated to F G: np.array with shape (n,n) - the Gram matrix associated to F
weights : np.ndarray with shape (n,) weights : np.ndarray with shape (n,)
""" """
self.D = D self.input_dim = input_dim
self.F = F self.F = F
self.G = G self.G = G
self.G_1 ,L,Li,logdet = pdinv(G) self.G_1 ,L,Li,logdet = pdinv(G)
@ -25,14 +25,14 @@ class finite_dimensional(kernpart):
assert weights.shape==(self.n,) assert weights.shape==(self.n,)
else: else:
weights = np.ones(self.n) weights = np.ones(self.n)
self.Nparam = self.n + 1 self.num_params = self.n + 1
self.name = 'finite_dim' self.name = 'finite_dim'
self._set_params(np.hstack((variance,weights))) self._set_params(np.hstack((variance,weights)))
def _get_params(self): def _get_params(self):
return np.hstack((self.variance,self.weights)) return np.hstack((self.variance,self.weights))
def _set_params(self,x): def _set_params(self,x):
assert x.size == (self.Nparam) assert x.size == (self.num_params)
self.variance = x[0] self.variance = x[0]
self.weights = x[1:] self.weights = x[1:]
def _get_param_names(self): def _get_param_names(self):
@ -48,7 +48,7 @@ class finite_dimensional(kernpart):
product = self.variance * mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T) product = self.variance * mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
np.add(product,target,target) np.add(product,target,target)
def Kdiag(self,X,target): def Kdiag(self,X,target):
product = np.diag(self.K(X,X2)) product = np.diag(self.K(X, X))
np.add(target,product,target) np.add(target,product,target)
def dK_dtheta(self,X,X2,target): def dK_dtheta(self,X,X2,target):
"""Return shape is NxMx(Ntheta)""" """Return shape is NxMx(Ntheta)"""

View file

@ -1,22 +1,21 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
import hashlib
class fixed(kernpart): class Fixed(Kernpart):
def __init__(self,D,K,variance=1.): def __init__(self, input_dim, K, variance=1.):
""" """
:param D: the number of input dimensions :param input_dim: the number of input dimensions
:type D: int :type input_dim: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
""" """
self.D = D self.input_dim = input_dim
self.fixed_K = K self.fixed_K = K
self.Nparam = 1 self.num_params = 1
self.name = 'fixed' self.name = 'Fixed'
self._set_params(np.array([variance]).flatten()) self._set_params(np.array([variance]).flatten())
def _get_params(self): def _get_params(self):

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
def index_to_slices(index): def index_to_slices(index):
@ -31,7 +31,7 @@ def index_to_slices(index):
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))] [ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret return ret
class independent_outputs(kernpart): class IndependentOutputs(Kernpart):
""" """
A kernel part shich can reopresent several independent functions. A kernel part shich can reopresent several independent functions.
this kernel 'switches off' parts of the matrix where the output indexes are different. this kernel 'switches off' parts of the matrix where the output indexes are different.
@ -41,8 +41,8 @@ class independent_outputs(kernpart):
""" """
def __init__(self,k): def __init__(self,k):
self.D = k.D + 1 self.input_dim = k.input_dim + 1
self.Nparam = k.Nparam self.num_params = k.num_params
self.name = 'iops('+ k.name + ')' self.name = 'iops('+ k.name + ')'
self.k = k self.k = k

View file

@ -4,30 +4,29 @@
import numpy as np import numpy as np
import pylab as pb import pylab as pb
from ..core.parameterised import parameterised from ..core.parameterised import Parameterised
from kernpart import kernpart from kernpart import Kernpart
import itertools import itertools
from prod import prod from prod import prod
from ..util.linalg import symmetrify
class kern(parameterised): class kern(Parameterised):
def __init__(self, input_dim, parts=[], input_slices=None): def __init__(self, input_dim, parts=[], input_slices=None):
""" """
This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where. This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This obnject contains a list of parts, which are computed additively. For multiplication, special _prod_ parts are used. The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This obnject contains a list of parts, which are computed additively. For multiplication, special _prod_ parts are used.
:param input_dim: The dimensioality of the kernel's input space :param input_dim: The dimensionality of the kernel's input space
:type input_dim: int :type input_dim: int
:param parts: the 'parts' (PD functions) of the kernel :param parts: the 'parts' (PD functions) of the kernel
:type parts: list of kernpart objects :type parts: list of Kernpart objects
:param input_slices: the slices on the inputs which apply to each kernel :param input_slices: the slices on the inputs which apply to each kernel
:type input_slices: list of slice objects, or list of bools :type input_slices: list of slice objects, or list of bools
""" """
self.parts = parts self.parts = parts
self.Nparts = len(parts) self.Nparts = len(parts)
self.Nparam = sum([p.Nparam for p in self.parts]) self.num_params = sum([p.num_params for p in self.parts])
self.input_dim = input_dim self.input_dim = input_dim
@ -39,11 +38,11 @@ class kern(parameterised):
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices] self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
for p in self.parts: for p in self.parts:
assert isinstance(p, kernpart), "bad kernel part" assert isinstance(p, Kernpart), "bad kernel part"
self.compute_param_slices() self.compute_param_slices()
parameterised.__init__(self) Parameterised.__init__(self)
def plot_ARD(self, fignum=None, ax=None): def plot_ARD(self, fignum=None, ax=None):
@ -80,8 +79,8 @@ class kern(parameterised):
self.param_slices = [] self.param_slices = []
count = 0 count = 0
for p in self.parts: for p in self.parts:
self.param_slices.append(slice(count, count + p.Nparam)) self.param_slices.append(slice(count, count + p.num_params))
count += p.Nparam count += p.num_params
def __add__(self, other): def __add__(self, other):
""" """
@ -104,21 +103,21 @@ class kern(parameterised):
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices) newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
# transfer constraints: # transfer constraints:
newkern.constrained_indices = self.constrained_indices + [x + self.Nparam for x in other.constrained_indices] newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices] newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.constraints = self.constraints + other.constraints newkern.constraints = self.constraints + other.constraints
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
else: else:
assert self.input_dim == other.input_dim assert self.input_dim == other.input_dim
newkern = kern(self.input_dim, self.parts + other.parts, self.input_slices + other.input_slices) newkern = kern(self.input_dim, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints: # transfer constraints:
newkern.constrained_indices = self.constrained_indices + [i + self.Nparam for i in other.constrained_indices] newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices]
newkern.constraints = self.constraints + other.constraints newkern.constraints = self.constraints + other.constraints
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices] newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
newkern.fixed_values = self.fixed_values + other.fixed_values newkern.fixed_values = self.fixed_values + other.fixed_values
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
return newkern return newkern
def __mul__(self, other): def __mul__(self, other):
@ -158,13 +157,13 @@ class kern(parameterised):
K1_param = [] K1_param = []
n = 0 n = 0
for k1 in K1.parts: for k1 in K1.parts:
K1_param += [range(n, n + k1.Nparam)] K1_param += [range(n, n + k1.num_params)]
n += k1.Nparam n += k1.num_params
n = 0 n = 0
K2_param = [] K2_param = []
for k2 in K2.parts: for k2 in K2.parts:
K2_param += [range(K1.Nparam + n, K1.Nparam + n + k2.Nparam)] K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)]
n += k2.Nparam n += k2.num_params
index_param = [] index_param = []
for p1 in K1_param: for p1 in K1_param:
for p2 in K2_param: for p2 in K2_param:
@ -172,12 +171,12 @@ class kern(parameterised):
index_param = np.array(index_param) index_param = np.array(index_param)
# Get the ties and constrains of the kernels before the multiplication # Get the ties and constrains of the kernels before the multiplication
prev_ties = K1.tied_indices + [arr + K1.Nparam for arr in K2.tied_indices] prev_ties = K1.tied_indices + [arr + K1.num_params for arr in K2.tied_indices]
prev_constr_ind = [K1.constrained_indices] + [K1.Nparam + i for i in K2.constrained_indices] prev_constr_ind = [K1.constrained_indices] + [K1.num_params + i for i in K2.constrained_indices]
prev_constr = K1.constraints + K2.constraints prev_constr = K1.constraints + K2.constraints
# prev_constr_fix = K1.fixed_indices + [arr + K1.Nparam for arr in K2.fixed_indices] # prev_constr_fix = K1.fixed_indices + [arr + K1.num_params for arr in K2.fixed_indices]
# prev_constr_fix_values = K1.fixed_values + K2.fixed_values # prev_constr_fix_values = K1.fixed_values + K2.fixed_values
# follow the previous ties # follow the previous ties
@ -186,7 +185,7 @@ class kern(parameterised):
index_param[np.where(index_param == j)[0]] = arr[0] index_param[np.where(index_param == j)[0]] = arr[0]
# ties and constrains # ties and constrains
for i in range(K1.Nparam + K2.Nparam): for i in range(K1.num_params + K2.num_params):
index = np.where(index_param == i)[0] index = np.where(index_param == i)[0]
if index.size > 1: if index.size > 1:
self.tie_params(index) self.tie_params(index)
@ -223,14 +222,14 @@ class kern(parameterised):
def dK_dtheta(self, dL_dK, X, X2=None): def dK_dtheta(self, dL_dK, X, X2=None):
""" """
:param dL_dK: An array of dL_dK derivaties, dL_dK :param dL_dK: An array of dL_dK derivaties, dL_dK
:type dL_dK: Np.ndarray (N x M) :type dL_dK: Np.ndarray (N x num_inducing)
:param X: Observed data inputs :param X: Observed data inputs
:type X: np.ndarray (N x input_dim) :type X: np.ndarray (N x input_dim)
:param X2: Observed dara inputs (optional, defaults to X) :param X2: Observed dara inputs (optional, defaults to X)
:type X2: np.ndarray (M x input_dim) :type X2: np.ndarray (num_inducing x input_dim)
""" """
assert X.shape[1] == self.input_dim assert X.shape[1] == self.input_dim
target = np.zeros(self.Nparam) target = np.zeros(self.num_params)
if X2 is None: if X2 is None:
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)] [p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
else: else:
@ -259,7 +258,7 @@ class kern(parameterised):
def dKdiag_dtheta(self, dL_dKdiag, X): def dKdiag_dtheta(self, dL_dKdiag, X):
assert X.shape[1] == self.input_dim assert X.shape[1] == self.input_dim
assert dL_dKdiag.size == X.shape[0] assert dL_dKdiag.size == X.shape[0]
target = np.zeros(self.Nparam) target = np.zeros(self.num_params)
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
return self._transform_gradients(target) return self._transform_gradients(target)
@ -275,7 +274,7 @@ class kern(parameterised):
return target return target
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
target = np.zeros(self.Nparam) target = np.zeros(self.num_params)
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
return self._transform_gradients(target) return self._transform_gradients(target)
@ -290,7 +289,7 @@ class kern(parameterised):
return target return target
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
target = np.zeros((self.Nparam)) target = np.zeros((self.num_params))
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
return self._transform_gradients(target) return self._transform_gradients(target)
@ -300,16 +299,16 @@ class kern(parameterised):
return target return target
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
"""return shapes are N,M,input_dim""" """return shapes are N,num_inducing,input_dim"""
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target_mu, target_S return target_mu, target_S
def psi2(self, Z, mu, S): def psi2(self, Z, mu, S):
""" """
:param Z: np.ndarray of inducing inputs (M x input_dim) :param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
:param mu, S: np.ndarrays of means and variances (each N x input_dim) :param mu, S: np.ndarrays of means and variances (each N x input_dim)
:returns psi2: np.ndarray (N,M,M) :returns psi2: np.ndarray (N,num_inducing,num_inducing)
""" """
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
@ -333,7 +332,7 @@ class kern(parameterised):
return target return target
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
target = np.zeros(self.Nparam) target = np.zeros(self.num_params)
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
# compute the "cross" terms # compute the "cross" terms
@ -430,7 +429,7 @@ class kern(parameterised):
Xnew = np.vstack((xx.flatten(), yy.flatten())).T Xnew = np.vstack((xx.flatten(), yy.flatten())).T
Kx = self.K(Xnew, x, which_parts) Kx = self.K(Xnew, x, which_parts)
Kx = Kx.reshape(resolution, resolution).T Kx = Kx.reshape(resolution, resolution).T
pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) # @UndefinedVariable
pb.xlim(xmin[0], xmax[0]) pb.xlim(xmin[0], xmax[0])
pb.ylim(xmin[1], xmax[1]) pb.ylim(xmin[1], xmax[1])
pb.xlabel("x1") pb.xlabel("x1")

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
class kernpart(object): class Kernpart(object):
def __init__(self,input_dim): def __init__(self,input_dim):
""" """
The base class for a kernpart: a positive definite function which forms part of a kernel The base class for a kernpart: a positive definite function which forms part of a kernel
@ -13,7 +13,7 @@ class kernpart(object):
Do not instantiate. Do not instantiate.
""" """
self.input_dim = input_dim self.input_dim = input_dim
self.Nparam = 1 self.num_params = 1
self.name = 'unnamed' self.name = 'unnamed'
def _get_params(self): def _get_params(self):

View file

@ -2,12 +2,12 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
from ..util.linalg import tdot from ..util.linalg import tdot
from scipy import weave from scipy import weave
class linear(kernpart): class linear(Kernpart):
""" """
Linear kernel Linear kernel
@ -28,7 +28,7 @@ class linear(kernpart):
self.input_dim = input_dim self.input_dim = input_dim
self.ARD = ARD self.ARD = ARD
if ARD == False: if ARD == False:
self.Nparam = 1 self.num_params = 1
self.name = 'linear' self.name = 'linear'
if variances is not None: if variances is not None:
variances = np.asarray(variances) variances = np.asarray(variances)
@ -37,7 +37,7 @@ class linear(kernpart):
variances = np.ones(1) variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,)) self._Xcache, self._X2cache = np.empty(shape=(2,))
else: else:
self.Nparam = self.input_dim self.num_params = self.input_dim
self.name = 'linear' self.name = 'linear'
if variances is not None: if variances is not None:
variances = np.asarray(variances) variances = np.asarray(variances)
@ -54,12 +54,12 @@ class linear(kernpart):
return self.variances return self.variances
def _set_params(self, x): def _set_params(self, x):
assert x.size == (self.Nparam) assert x.size == (self.num_params)
self.variances = x self.variances = x
self.variances2 = np.square(self.variances) self.variances2 = np.square(self.variances)
def _get_param_names(self): def _get_param_names(self):
if self.Nparam == 1: if self.num_params == 1:
return ['variance'] return ['variance']
else: else:
return ['variance_%i' % i for i in range(self.variances.size)] return ['variance_%i' % i for i in range(self.variances.size)]
@ -138,7 +138,7 @@ class linear(kernpart):
def psi2(self, Z, mu, S, target): def psi2(self, Z, mu, S, target):
""" """
returns N,M,M matrix returns N,num_inducing,num_inducing matrix
""" """
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
# psi2_old = self.ZZ * np.square(self.variances) * self.mu2_S[:, None, None, :] # psi2_old = self.ZZ * np.square(self.variances) * self.mu2_S[:, None, None, :]
@ -168,7 +168,7 @@ class linear(kernpart):
target += tmp.sum() target += tmp.sum()
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,M,M,input_dim """ """Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :] AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :]
AZZA = AZZA + AZZA.swapaxes(1, 2) AZZA = AZZA + AZZA.swapaxes(1, 2)
@ -184,7 +184,7 @@ class linear(kernpart):
double factor,tmp; double factor,tmp;
#pragma omp parallel for private(m,mm,q,qq,factor,tmp) #pragma omp parallel for private(m,mm,q,qq,factor,tmp)
for(n=0;n<N;n++){ for(n=0;n<N;n++){
for(m=0;m<M;m++){ for(m=0;m<num_inducing;m++){
for(mm=0;mm<=m;mm++){ for(mm=0;mm<=m;mm++){
//add in a factor of 2 for the off-diagonal terms (and then count them only once) //add in a factor of 2 for the off-diagonal terms (and then count them only once)
if(m==mm) if(m==mm)
@ -215,9 +215,9 @@ class linear(kernpart):
'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']} 'extra_link_args' : ['-lgomp']}
N,M,input_dim = mu.shape[0],Z.shape[0],mu.shape[1] N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','M','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options) type_converters=weave.converters.blitz,**weave_options)
@ -231,9 +231,9 @@ class linear(kernpart):
code=""" code="""
int n,m,mm,q; int n,m,mm,q;
#pragma omp parallel for private(n,mm,q) #pragma omp parallel for private(n,mm,q)
for(m=0;m<M;m++){ for(m=0;m<num_inducing;m++){
for(q=0;q<input_dim;q++){ for(q=0;q<input_dim;q++){
for(mm=0;mm<M;mm++){ for(mm=0;mm<num_inducing;mm++){
for(n=0;n<N;n++){ for(n=0;n<N;n++){
target(m,q) += dL_dpsi2(n,m,mm)*AZA(n,mm,q); target(m,q) += dL_dpsi2(n,m,mm)*AZA(n,mm,q);
} }
@ -249,9 +249,9 @@ class linear(kernpart):
'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']} 'extra_link_args' : ['-lgomp']}
N,M,input_dim = mu.shape[0],Z.shape[0],mu.shape[1] N,num_inducing,input_dim = mu.shape[0],Z.shape[0],mu.shape[1]
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','M','input_dim','AZA','target','dL_dpsi2'], arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options) type_converters=weave.converters.blitz,**weave_options)
@ -278,7 +278,7 @@ class linear(kernpart):
muS_changed = not (np.array_equal(mu, self._mu) and np.array_equal(S, self._S)) muS_changed = not (np.array_equal(mu, self._mu) and np.array_equal(S, self._S))
if Zv_changed: if Zv_changed:
# Z has changed, compute Z specific stuff # Z has changed, compute Z specific stuff
# self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,input_dim # self.ZZ = Z[:,None,:]*Z[None,:,:] # num_inducing,num_inducing,input_dim
# self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F') # self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F')
# [tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])] # [tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])]
self.ZA = Z * self.variances self.ZA = Z * self.variances
@ -291,5 +291,5 @@ class linear(kernpart):
self.inner[:, diag_indices[0], diag_indices[1]] += S self.inner[:, diag_indices[0], diag_indices[1]] += S
self._mu, self._S = mu.copy(), S.copy() self._mu, self._S = mu.copy(), S.copy()
if Zv_changed or muS_changed: if Zv_changed or muS_changed:
self.ZAinner = np.dot(self.ZA, self.inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [M x N x input_dim]! self.ZAinner = np.dot(self.ZA, self.inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]!
self._psi2 = np.dot(self.ZAinner, self.ZA.T) self._psi2 = np.dot(self.ZAinner, self.ZA.T)

View file

@ -2,12 +2,12 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
from GPy.util.linalg import mdot, pdinv from GPy.util.linalg import mdot
from GPy.util.decorators import silence_errors from GPy.util.decorators import silence_errors
class periodic_Matern32(kernpart): class periodic_Matern32(Kernpart):
""" """
Kernel of the periodic subspace (up to a given frequency) of a Matern 3/2 RKHS. Only defined for input_dim=1. Kernel of the periodic subspace (up to a given frequency) of a Matern 3/2 RKHS. Only defined for input_dim=1.
@ -35,7 +35,7 @@ class periodic_Matern32(kernpart):
else: else:
lengthscale = np.ones(1) lengthscale = np.ones(1)
self.lower,self.upper = lower, upper self.lower,self.upper = lower, upper
self.Nparam = 3 self.num_params = 3
self.n_freq = n_freq self.n_freq = n_freq
self.n_basis = 2*n_freq self.n_basis = 2*n_freq
self._set_params(np.hstack((variance,lengthscale,period))) self._set_params(np.hstack((variance,lengthscale,period)))
@ -113,7 +113,7 @@ class periodic_Matern32(kernpart):
@silence_errors @silence_errors
def dK_dtheta(self,dL_dK,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)""" """derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2) FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)

View file

@ -2,12 +2,12 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
from GPy.util.linalg import mdot, pdinv from GPy.util.linalg import mdot
from GPy.util.decorators import silence_errors from GPy.util.decorators import silence_errors
class periodic_Matern52(kernpart): class periodic_Matern52(Kernpart):
""" """
Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for input_dim=1. Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for input_dim=1.
@ -35,7 +35,7 @@ class periodic_Matern52(kernpart):
else: else:
lengthscale = np.ones(1) lengthscale = np.ones(1)
self.lower,self.upper = lower, upper self.lower,self.upper = lower, upper
self.Nparam = 3 self.num_params = 3
self.n_freq = n_freq self.n_freq = n_freq
self.n_basis = 2*n_freq self.n_basis = 2*n_freq
self._set_params(np.hstack((variance,lengthscale,period))) self._set_params(np.hstack((variance,lengthscale,period)))
@ -115,7 +115,7 @@ class periodic_Matern52(kernpart):
@silence_errors @silence_errors
def dK_dtheta(self,dL_dK,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)""" """derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2) FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
@ -209,7 +209,7 @@ class periodic_Matern52(kernpart):
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None] F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
#dK_dvar #dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T) dK_dvar = 1. / self.variance * mdot(FX, self.Gi, FX.T)
#dK_dlen #dK_dlen
da_dlen = [-3*self.a[0]/self.lengthscale, -2*self.a[1]/self.lengthscale, -self.a[2]/self.lengthscale, 0.] da_dlen = [-3*self.a[0]/self.lengthscale, -2*self.a[1]/self.lengthscale, -self.a[2]/self.lengthscale, 0.]

View file

@ -2,12 +2,12 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
from GPy.util.linalg import mdot, pdinv from GPy.util.linalg import mdot
from GPy.util.decorators import silence_errors from GPy.util.decorators import silence_errors
class periodic_exponential(kernpart): class periodic_exponential(Kernpart):
""" """
Kernel of the periodic subspace (up to a given frequency) of a exponential (Matern 1/2) RKHS. Only defined for input_dim=1. Kernel of the periodic subspace (up to a given frequency) of a exponential (Matern 1/2) RKHS. Only defined for input_dim=1.
@ -35,7 +35,7 @@ class periodic_exponential(kernpart):
else: else:
lengthscale = np.ones(1) lengthscale = np.ones(1)
self.lower,self.upper = lower, upper self.lower,self.upper = lower, upper
self.Nparam = 3 self.num_params = 3
self.n_freq = n_freq self.n_freq = n_freq
self.n_basis = 2*n_freq self.n_basis = 2*n_freq
self._set_params(np.hstack((variance,lengthscale,period))) self._set_params(np.hstack((variance,lengthscale,period)))
@ -111,7 +111,7 @@ class periodic_exponential(kernpart):
@silence_errors @silence_errors
def dK_dtheta(self,dL_dK,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)""" """derivative of the covariance matrix with respect to the parameters (shape is Nxnum_inducingxNparam)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2) FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)

View file

@ -1,23 +1,23 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
import hashlib import hashlib
class prod(kernpart): class prod(Kernpart):
""" """
Computes the product of 2 kernels Computes the product of 2 kernels
:param k1, k2: the kernels to multiply :param k1, k2: the kernels to multiply
:type k1, k2: kernpart :type k1, k2: Kernpart
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces :param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean :type tensor: Boolean
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self,k1,k2,tensor=False): def __init__(self,k1,k2,tensor=False):
self.Nparam = k1.Nparam + k2.Nparam self.num_params = k1.num_params + k2.num_params
self.name = k1.name + '<times>' + k2.name self.name = k1.name + '<times>' + k2.name
self.k1 = k1 self.k1 = k1
self.k2 = k2 self.k2 = k2
@ -40,8 +40,8 @@ class prod(kernpart):
def _set_params(self,x): def _set_params(self,x):
"""set the value of the parameters.""" """set the value of the parameters."""
self.k1._set_params(x[:self.k1.Nparam]) self.k1._set_params(x[:self.k1.num_params])
self.k2._set_params(x[self.k1.Nparam:]) self.k2._set_params(x[self.k1.num_params:])
def _get_param_names(self): def _get_param_names(self):
"""return parameter names.""" """return parameter names."""
@ -55,11 +55,11 @@ class prod(kernpart):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2) self._K_computations(X,X2)
if X2 is None: if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.Nparam]) self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.Nparam:]) self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
else: else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.Nparam]) self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.Nparam:]) self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
def Kdiag(self,X,target): def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
@ -74,8 +74,8 @@ class prod(kernpart):
K2 = np.zeros(X.shape[0]) K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,self.slice1],K1) self.k1.Kdiag(X[:,self.slice1],K1)
self.k2.Kdiag(X[:,self.slice2],K2) self.k2.Kdiag(X[:,self.slice2],K2)
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.Nparam]) self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.Nparam:]) self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
def dK_dX(self,dL_dK,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""

View file

@ -1,23 +1,23 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
import hashlib import hashlib
#from scipy import integrate # This may not be necessary (Nicolas, 20th Feb) #from scipy import integrate # This may not be necessary (Nicolas, 20th Feb)
class prod_orthogonal(kernpart): class prod_orthogonal(Kernpart):
""" """
Computes the product of 2 kernels Computes the product of 2 kernels
:param k1, k2: the kernels to multiply :param k1, k2: the kernels to multiply
:type k1, k2: kernpart :type k1, k2: Kernpart
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self,k1,k2): def __init__(self,k1,k2):
self.input_dim = k1.input_dim + k2.input_dim self.input_dim = k1.input_dim + k2.input_dim
self.Nparam = k1.Nparam + k2.Nparam self.num_params = k1.num_params + k2.num_params
self.name = k1.name + '<times>' + k2.name self.name = k1.name + '<times>' + k2.name
self.k1 = k1 self.k1 = k1
self.k2 = k2 self.k2 = k2
@ -30,8 +30,8 @@ class prod_orthogonal(kernpart):
def _set_params(self,x): def _set_params(self,x):
"""set the value of the parameters.""" """set the value of the parameters."""
self.k1._set_params(x[:self.k1.Nparam]) self.k1._set_params(x[:self.k1.num_params])
self.k2._set_params(x[self.k1.Nparam:]) self.k2._set_params(x[self.k1.num_params:])
def _get_param_names(self): def _get_param_names(self):
"""return parameter names.""" """return parameter names."""
@ -45,11 +45,11 @@ class prod_orthogonal(kernpart):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
self._K_computations(X,X2) self._K_computations(X,X2)
if X2 is None: if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.Nparam]) self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.Nparam:]) self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:])
else: else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.Nparam]) self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.Nparam:]) self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:])
def Kdiag(self,X,target): def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
@ -64,8 +64,8 @@ class prod_orthogonal(kernpart):
K2 = np.zeros(X.shape[0]) K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,:self.k1.input_dim],K1) self.k1.Kdiag(X[:,:self.k1.input_dim],K1)
self.k2.Kdiag(X[:,self.k1.input_dim:],K2) self.k2.Kdiag(X[:,self.k1.input_dim:],K2)
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.Nparam]) self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params])
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.Nparam:]) self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:])
def dK_dX(self,dL_dK,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""

View file

@ -2,10 +2,10 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
class rational_quadratic(kernpart): class rational_quadratic(Kernpart):
""" """
rational quadratic kernel rational quadratic kernel
@ -21,13 +21,13 @@ class rational_quadratic(kernpart):
:type lengthscale: float :type lengthscale: float
:param power: the power :math:`\\alpha` :param power: the power :math:`\\alpha`
:type power: float :type power: float
:rtype: kernpart object :rtype: Kernpart object
""" """
def __init__(self,input_dim,variance=1.,lengthscale=1.,power=1.): def __init__(self,input_dim,variance=1.,lengthscale=1.,power=1.):
assert input_dim == 1, "For this kernel we assume input_dim=1" assert input_dim == 1, "For this kernel we assume input_dim=1"
self.input_dim = input_dim self.input_dim = input_dim
self.Nparam = 3 self.num_params = 3
self.name = 'rat_quad' self.name = 'rat_quad'
self.variance = variance self.variance = variance
self.lengthscale = lengthscale self.lengthscale = lengthscale

View file

@ -2,13 +2,13 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
import hashlib import hashlib
from scipy import weave from scipy import weave
from ..util.linalg import tdot from ..util.linalg import tdot
class rbf(kernpart): class rbf(Kernpart):
""" """
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel: Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
@ -36,14 +36,14 @@ class rbf(kernpart):
self.name = 'rbf' self.name = 'rbf'
self.ARD = ARD self.ARD = ARD
if not ARD: if not ARD:
self.Nparam = 2 self.num_params = 2
if lengthscale is not None: if lengthscale is not None:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else: else:
lengthscale = np.ones(1) lengthscale = np.ones(1)
else: else:
self.Nparam = self.input_dim + 1 self.num_params = self.input_dim + 1
if lengthscale is not None: if lengthscale is not None:
lengthscale = np.asarray(lengthscale) lengthscale = np.asarray(lengthscale)
assert lengthscale.size == self.input_dim, "bad number of lengthscales" assert lengthscale.size == self.input_dim, "bad number of lengthscales"
@ -67,7 +67,7 @@ class rbf(kernpart):
return np.hstack((self.variance, self.lengthscale)) return np.hstack((self.variance, self.lengthscale))
def _set_params(self, x): def _set_params(self, x):
assert x.size == (self.Nparam) assert x.size == (self.num_params)
self.variance = x[0] self.variance = x[0]
self.lengthscale = x[1:] self.lengthscale = x[1:]
self.lengthscale2 = np.square(self.lengthscale) self.lengthscale2 = np.square(self.lengthscale)
@ -76,7 +76,7 @@ class rbf(kernpart):
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def _get_param_names(self): def _get_param_names(self):
if self.Nparam == 2: if self.num_params == 2:
return ['variance', 'lengthscale'] return ['variance', 'lengthscale']
else: else:
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)] return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
@ -110,7 +110,7 @@ class rbf(kernpart):
target(q+1) += var_len3(q)*tmp; target(q+1) += var_len3(q)*tmp;
} }
""" """
N, M, input_dim = X.shape[0], X.shape[0], self.input_dim N, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
else: else:
code = """ code = """
int q,i,j; int q,i,j;
@ -118,16 +118,16 @@ class rbf(kernpart):
for(q=0; q<input_dim; q++){ for(q=0; q<input_dim; q++){
tmp = 0; tmp = 0;
for(i=0; i<N; i++){ for(i=0; i<N; i++){
for(j=0; j<M; j++){ for(j=0; j<num_inducing; j++){
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j); tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
} }
} }
target(q+1) += var_len3(q)*tmp; target(q+1) += var_len3(q)*tmp;
} }
""" """
N, M, input_dim = X.shape[0], X2.shape[0], self.input_dim N, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)] # [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
weave.inline(code, arg_names=['N', 'M', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3'], weave.inline(code, arg_names=['N','num_inducing','input_dim','X','X2','target','dvardLdK','var_len3'],
type_converters=weave.converters.blitz, **self.weave_options) type_converters=weave.converters.blitz, **self.weave_options)
else: else:
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
@ -191,7 +191,7 @@ class rbf(kernpart):
target += self._psi2 target += self._psi2
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target): def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
"""Shape N,M,M,Ntheta""" """Shape N,num_inducing,num_inducing,Ntheta"""
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
d_var = 2.*self._psi2 / self.variance d_var = 2.*self._psi2 / self.variance
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom) d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
@ -205,19 +205,18 @@ class rbf(kernpart):
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target): def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
term1 = self._psi2_Zdist / self.lengthscale2 # M, M, input_dim term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, M, M, input_dim term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2) dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,M,M,input_dim """ """Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S) self._psi_computations(Z, mu, S)
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
#---------------------------------------# #---------------------------------------#
# Precomputations # # Precomputations #
#---------------------------------------# #---------------------------------------#
@ -242,9 +241,9 @@ class rbf(kernpart):
# here are the "statistics" for psi1 and psi2 # here are the "statistics" for psi1 and psi2
if not np.array_equal(Z, self._Z): if not np.array_equal(Z, self._Z):
#Z has changed, compute Z specific stuff #Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,input_dim self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # num_inducing,num_inducing,input_dim
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,input_dim self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # num_inducing,num_inducing,input_dim
self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,input_dim self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # num_inducing,num_inducing,input_dim
self._Z = Z self._Z = Z
if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)): if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
@ -258,24 +257,24 @@ class rbf(kernpart):
self._psi1 = self.variance*np.exp(self._psi1_exponent) self._psi1 = self.variance*np.exp(self._psi1_exponent)
#psi2 #psi2
self._psi2_denom = 2.*S[:, None, None, :] / self.lengthscale2 + 1. # N,M,M,input_dim self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,num_inducing,num_inducing,input_dim
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu,self._psi2_Zhat) self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu,self._psi2_Zhat)
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,input_dim #self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,num_inducing,num_inducing,input_dim
#self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom) #self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M #self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,num_inducing,num_inducing
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,num_inducing,num_inducing
#store matrices for caching #store matrices for caching
self._Z, self._mu, self._S = Z, mu,S self._Z, self._mu, self._S = Z, mu,S
def weave_psi2(self,mu,Zhat): def weave_psi2(self,mu,Zhat):
N,input_dim = mu.shape N,input_dim = mu.shape
M = Zhat.shape[0] num_inducing = Zhat.shape[0]
mudist = np.empty((N, M, M, input_dim)) mudist = np.empty((N,num_inducing,num_inducing,input_dim))
mudist_sq = np.empty((N, M, M, input_dim)) mudist_sq = np.empty((N,num_inducing,num_inducing,input_dim))
psi2_exponent = np.zeros((N, M, M)) psi2_exponent = np.zeros((N,num_inducing,num_inducing))
psi2 = np.empty((N, M, M)) psi2 = np.empty((N,num_inducing,num_inducing))
psi2_Zdist_sq = self._psi2_Zdist_sq psi2_Zdist_sq = self._psi2_Zdist_sq
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim) _psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
@ -290,7 +289,7 @@ class rbf(kernpart):
#pragma omp parallel for private(tmp) #pragma omp parallel for private(tmp)
for (int n=0; n<N; n++){ for (int n=0; n<N; n++){
for (int m=0; m<M; m++){ for (int m=0; m<num_inducing; m++){
for (int mm=0; mm<(m+1); mm++){ for (int mm=0; mm<(m+1); mm++){
for (int q=0; q<input_dim; q++){ for (int q=0; q<input_dim; q++){
//compute mudist //compute mudist
@ -325,7 +324,7 @@ class rbf(kernpart):
#include <math.h> #include <math.h>
""" """
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'M', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'], arg_names=['N','num_inducing','input_dim','mu','Zhat','mudist_sq','mudist','lengthscale2','_psi2_denom','psi2_Zdist_sq','psi2_exponent','half_log_psi2_denom','psi2','variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options) type_converters=weave.converters.blitz, **self.weave_options)
return mudist, mudist_sq, psi2_exponent, psi2 return mudist, mudist_sq, psi2_exponent, psi2

View file

@ -3,10 +3,10 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
class rbfcos(kernpart): class rbfcos(Kernpart):
def __init__(self,input_dim,variance=1.,frequencies=None,bandwidths=None,ARD=False): def __init__(self,input_dim,variance=1.,frequencies=None,bandwidths=None,ARD=False):
self.input_dim = input_dim self.input_dim = input_dim
self.name = 'rbfcos' self.name = 'rbfcos'
@ -14,9 +14,9 @@ class rbfcos(kernpart):
print "Warning: the rbfcos kernel requires a lot of memory for high dimensional inputs" print "Warning: the rbfcos kernel requires a lot of memory for high dimensional inputs"
self.ARD = ARD self.ARD = ARD
#set the default frequencies and bandwidths, appropriate Nparam #set the default frequencies and bandwidths, appropriate num_params
if ARD: if ARD:
self.Nparam = 2*self.input_dim + 1 self.num_params = 2*self.input_dim + 1
if frequencies is not None: if frequencies is not None:
frequencies = np.asarray(frequencies) frequencies = np.asarray(frequencies)
assert frequencies.size == self.input_dim, "bad number of frequencies" assert frequencies.size == self.input_dim, "bad number of frequencies"
@ -28,7 +28,7 @@ class rbfcos(kernpart):
else: else:
bandwidths = np.ones(self.input_dim) bandwidths = np.ones(self.input_dim)
else: else:
self.Nparam = 3 self.num_params = 3
if frequencies is not None: if frequencies is not None:
frequencies = np.asarray(frequencies) frequencies = np.asarray(frequencies)
assert frequencies.size == 1, "Exactly one frequency needed for non-ARD kernel" assert frequencies.size == 1, "Exactly one frequency needed for non-ARD kernel"
@ -51,7 +51,7 @@ class rbfcos(kernpart):
return np.hstack((self.variance,self.frequencies, self.bandwidths)) return np.hstack((self.variance,self.frequencies, self.bandwidths))
def _set_params(self,x): def _set_params(self,x):
assert x.size==(self.Nparam) assert x.size==(self.num_params)
if self.ARD: if self.ARD:
self.variance = x[0] self.variance = x[0]
self.frequencies = x[1:1+self.input_dim] self.frequencies = x[1:1+self.input_dim]
@ -60,7 +60,7 @@ class rbfcos(kernpart):
self.variance, self.frequencies, self.bandwidths = x self.variance, self.frequencies, self.bandwidths = x
def _get_param_names(self): def _get_param_names(self):
if self.Nparam == 3: if self.num_params == 3:
return ['variance','frequency','bandwidth'] return ['variance','frequency','bandwidth']
else: else:
return ['variance']+['frequency_%i'%i for i in range(self.input_dim)]+['bandwidth_%i'%i for i in range(self.input_dim)] return ['variance']+['frequency_%i'%i for i in range(self.input_dim)]+['bandwidth_%i'%i for i in range(self.input_dim)]
@ -106,7 +106,7 @@ class rbfcos(kernpart):
self._dist2 = np.square(self._dist) self._dist2 = np.square(self._dist)
#ensure the next section is computed: #ensure the next section is computed:
self._params = np.empty(self.Nparam) self._params = np.empty(self.num_params)
if not np.all(self._params == self._get_params()): if not np.all(self._params == self._get_params()):
self._params == self._get_params().copy() self._params == self._get_params().copy()

View file

@ -2,14 +2,14 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
import hashlib import hashlib
def theta(x): def theta(x):
"""Heaviside step function""" """Heaviside step function"""
return np.where(x>=0.,1.,0.) return np.where(x>=0.,1.,0.)
class spline(kernpart): class spline(Kernpart):
""" """
Spline kernel Spline kernel
@ -23,7 +23,7 @@ class spline(kernpart):
def __init__(self,input_dim,variance=1.,lengthscale=1.): def __init__(self,input_dim,variance=1.,lengthscale=1.):
self.input_dim = input_dim self.input_dim = input_dim
assert self.input_dim==1 assert self.input_dim==1
self.Nparam = 1 self.num_params = 1
self.name = 'spline' self.name = 'spline'
self._set_params(np.squeeze(variance)) self._set_params(np.squeeze(variance))

View file

@ -1,18 +1,18 @@
# Copyright (c) 2012 James Hensman # Copyright (c) 2012 James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
class symmetric(kernpart): class symmetric(Kernpart):
""" """
Symmetrical kernels Symmetrical kernels
:param k: the kernel to symmetrify :param k: the kernel to symmetrify
:type k: kernpart :type k: Kernpart
:param transform: the transform to use in symmetrification (allows symmetry on specified axes) :param transform: the transform to use in symmetrification (allows symmetry on specified axes)
:type transform: A numpy array (input_dim x input_dim) specifiying the transform :type transform: A numpy array (input_dim x input_dim) specifiying the transform
:rtype: kernpart :rtype: Kernpart
""" """
def __init__(self,k,transform=None): def __init__(self,k,transform=None):
@ -21,7 +21,7 @@ class symmetric(kernpart):
assert transform.shape == (k.input_dim, k.input_dim) assert transform.shape == (k.input_dim, k.input_dim)
self.transform = transform self.transform = transform
self.input_dim = k.input_dim self.input_dim = k.input_dim
self.Nparam = k.Nparam self.num_params = k.num_params
self.name = k.name + '_symm' self.name = k.name + '_symm'
self.k = k self.k = k
self._set_params(k._get_params()) self._set_params(k._get_params())

View file

@ -9,9 +9,9 @@ import sys
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
import tempfile import tempfile
import pdb import pdb
from kernpart import kernpart from kernpart import Kernpart
class spkern(kernpart): class spkern(Kernpart):
""" """
A kernel object, where all the hard work in done by sympy. A kernel object, where all the hard work in done by sympy.
@ -38,12 +38,12 @@ class spkern(kernpart):
self.input_dim = len(self._sp_x) self.input_dim = len(self._sp_x)
assert self.input_dim == input_dim assert self.input_dim == input_dim
self._sp_theta = sorted([e for e in sp_vars if not (e.name[0]=='x' or e.name[0]=='z')],key=lambda e:e.name) self._sp_theta = sorted([e for e in sp_vars if not (e.name[0]=='x' or e.name[0]=='z')],key=lambda e:e.name)
self.Nparam = len(self._sp_theta) self.num_params = len(self._sp_theta)
#deal with param #deal with param
if param is None: if param is None:
param = np.ones(self.Nparam) param = np.ones(self.num_params)
assert param.size==self.Nparam assert param.size==self.num_params
self._set_params(param) self._set_params(param)
#Differentiate! #Differentiate!
@ -115,19 +115,19 @@ class spkern(kernpart):
#Here's some code to do the looping for K #Here's some code to do the looping for K
arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]\ arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]\
+ ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]\ + ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]\
+ ["param[%i]"%i for i in range(self.Nparam)]) + ["param[%i]"%i for i in range(self.num_params)])
self._K_code =\ self._K_code =\
""" """
int i; int i;
int j; int j;
int N = target_array->dimensions[0]; int N = target_array->dimensions[0];
int M = target_array->dimensions[1]; int num_inducing = target_array->dimensions[1];
int input_dim = X_array->dimensions[1]; int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j) //#pragma omp parallel for private(j)
for (i=0;i<N;i++){ for (i=0;i<N;i++){
for (j=0;j<M;j++){ for (j=0;j<num_inducing;j++){
target[i*M+j] = k(%s); target[i*num_inducing+j] = k(%s);
} }
} }
%s %s
@ -149,17 +149,17 @@ class spkern(kernpart):
"""%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed """%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
#here's some code to compute gradients #here's some code to compute gradients
funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*M+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in enumerate(self._sp_theta)]) funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*num_inducing+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in enumerate(self._sp_theta)])
self._dK_dtheta_code =\ self._dK_dtheta_code =\
""" """
int i; int i;
int j; int j;
int N = partial_array->dimensions[0]; int N = partial_array->dimensions[0];
int M = partial_array->dimensions[1]; int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1]; int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j) //#pragma omp parallel for private(j)
for (i=0;i<N;i++){ for (i=0;i<N;i++){
for (j=0;j<M;j++){ for (j=0;j<num_inducing;j++){
%s %s
} }
} }
@ -169,7 +169,7 @@ class spkern(kernpart):
#here's some code to compute gradients for Kdiag TODO: thius is yucky. #here's some code to compute gradients for Kdiag TODO: thius is yucky.
diag_funclist = re.sub('Z','X',funclist,count=0) diag_funclist = re.sub('Z','X',funclist,count=0)
diag_funclist = re.sub('j','i',diag_funclist) diag_funclist = re.sub('j','i',diag_funclist)
diag_funclist = re.sub('partial\[i\*M\+i\]','partial[i]',diag_funclist) diag_funclist = re.sub('partial\[i\*num_inducing\+i\]','partial[i]',diag_funclist)
self._dKdiag_dtheta_code =\ self._dKdiag_dtheta_code =\
""" """
int i; int i;
@ -182,17 +182,17 @@ class spkern(kernpart):
"""%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed """%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
#Here's some code to do gradients wrt x #Here's some code to do gradients wrt x
gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*M+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.input_dim)]) gradient_funcs = "\n".join(["target[i*input_dim+%i] += partial[i*num_inducing+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.input_dim)])
self._dK_dX_code = \ self._dK_dX_code = \
""" """
int i; int i;
int j; int j;
int N = partial_array->dimensions[0]; int N = partial_array->dimensions[0];
int M = partial_array->dimensions[1]; int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1]; int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j) //#pragma omp parallel for private(j)
for (i=0;i<N; i++){ for (i=0;i<N; i++){
for (j=0; j<M; j++){ for (j=0; j<num_inducing; j++){
%s %s
//if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));} //if(isnan(target[i*input_dim+2])){printf("%%f\\n",dk_dx2(X[i*input_dim+0], X[i*input_dim+1], X[i*input_dim+2], Z[j*input_dim+0], Z[j*input_dim+1], Z[j*input_dim+2], param[0], param[1], param[2], param[3], param[4], param[5]));}
//if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);} //if(isnan(target[i*input_dim+2])){printf("%%f,%%f,%%i,%%i\\n", X[i*input_dim+2], Z[j*input_dim+2],i,j);}
@ -208,7 +208,7 @@ class spkern(kernpart):
int i; int i;
int j; int j;
int N = partial_array->dimensions[0]; int N = partial_array->dimensions[0];
int M = 0; int num_inducing = 0;
int input_dim = X_array->dimensions[1]; int input_dim = X_array->dimensions[1];
for (i=0;i<N; i++){ for (i=0;i<N; i++){
j = i; j = i;

View file

@ -2,9 +2,9 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import Kernpart
import numpy as np import numpy as np
class white(kernpart): class white(Kernpart):
""" """
White noise kernel. White noise kernel.
@ -15,7 +15,7 @@ class white(kernpart):
""" """
def __init__(self,input_dim,variance=1.): def __init__(self,input_dim,variance=1.):
self.input_dim = input_dim self.input_dim = input_dim
self.Nparam = 1 self.num_params = 1
self.name = 'white' self.name = 'white'
self._set_params(np.array([variance]).flatten()) self._set_params(np.array([variance]).flatten())
self._psi1 = 0 # TODO: more elegance here self._psi1 = 0 # TODO: more elegance here

View file

@ -139,7 +139,7 @@ class EP(likelihood):
The expectation-propagation algorithm with sparse pseudo-input. The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013. For nomenclature see ... 2013.
""" """
M = Kmm.shape[0] num_inducing = Kmm.shape[0]
#TODO: this doesn't work with uncertain inputs! #TODO: this doesn't work with uncertain inputs!
@ -235,7 +235,7 @@ class EP(likelihood):
The expectation-propagation algorithm with sparse pseudo-input. The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008. For nomenclature see Naish-Guzman and Holden, 2008.
""" """
M = Kmm.shape[0] num_inducing = Kmm.shape[0]
""" """
Prior approximation parameters: Prior approximation parameters:
@ -258,7 +258,7 @@ class EP(likelihood):
mu = w + P*Gamma mu = w + P*Gamma
""" """
self.w = np.zeros(self.N) self.w = np.zeros(self.N)
self.Gamma = np.zeros(M) self.Gamma = np.zeros(num_inducing)
mu = np.zeros(self.N) mu = np.zeros(self.N)
P = P0.copy() P = P0.copy()
R = R0.copy() R = R0.copy()
@ -305,10 +305,10 @@ class EP(likelihood):
dtd1 = Delta_tau*Diag[i] + 1. dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i] dii = Diag[i]
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1 Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = P[i,:].reshape(1,M) pi_ = P[i,:].reshape(1,num_inducing)
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_ P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(R,pi_.T) Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R)) RTR = np.dot(R.T,np.dot(np.eye(num_inducing) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T R = jitchol(RTR).T
self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1 self.w[i] += (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T) self.Gamma += (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
@ -321,7 +321,7 @@ class EP(likelihood):
Diag = Diag0 * Iplus_Dprod_i Diag = Diag0 * Iplus_Dprod_i
P = Iplus_Dprod_i[:,None] * P0 P = Iplus_Dprod_i[:,None] * P0
safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0) safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0)
L = jitchol(np.eye(M) + np.dot(RPT0,safe_diag[:,None]*RPT0.T)) L = jitchol(np.eye(num_inducing) + np.dot(RPT0,safe_diag[:,None]*RPT0.T))
R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1) R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T) RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)

View file

@ -23,7 +23,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
:type init: 'PCA'|'random' :type init: 'PCA'|'random'
""" """
def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', M=10, def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, oldpsave=10, _debug=False, Z=None, kernel=None, oldpsave=10, _debug=False,
**kwargs): **kwargs):
if type(likelihood_or_Y) is np.ndarray: if type(likelihood_or_Y) is np.ndarray:
@ -39,7 +39,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1) X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
if Z is None: if Z is None:
Z = np.random.permutation(X.copy())[:M] Z = np.random.permutation(X.copy())[:num_inducing]
assert Z.shape[1] == X.shape[1] assert Z.shape[1] == X.shape[1]
if kernel is None: if kernel is None:
@ -73,8 +73,8 @@ class BayesianGPLVM(SparseGP, GPLVM):
self._oldps.insert(0, p.copy()) self._oldps.insert(0, p.copy())
def _get_param_names(self): def _get_param_names(self):
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
return (X_names + S_names + SparseGP._get_param_names(self)) return (X_names + S_names + SparseGP._get_param_names(self))
def _get_params(self): def _get_params(self):
@ -96,7 +96,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
def _set_params(self, x, save_old=True, save_count=0): def _set_params(self, x, save_old=True, save_count=0):
# try: # try:
x = self._clipped(x) x = self._clipped(x)
N, input_dim = self.N, self.input_dim N, input_dim = self.num_data, self.input_dim
self.X = x[:self.X.size].reshape(N, input_dim).copy() self.X = x[:self.X.size].reshape(N, input_dim).copy()
self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy() self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy()
SparseGP._set_params(self, x[(2 * N * input_dim):]) SparseGP._set_params(self, x[(2 * N * input_dim):])
@ -126,7 +126,7 @@ class BayesianGPLVM(SparseGP, GPLVM):
def KL_divergence(self): def KL_divergence(self):
var_mean = np.square(self.X).sum() var_mean = np.square(self.X).sum()
var_S = np.sum(self.X_variance - np.log(self.X_variance)) var_S = np.sum(self.X_variance - np.log(self.X_variance))
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.N return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
def log_likelihood(self): def log_likelihood(self):
ll = SparseGP.log_likelihood(self) ll = SparseGP.log_likelihood(self)
@ -146,11 +146,11 @@ class BayesianGPLVM(SparseGP, GPLVM):
self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]]) self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
# sf2 = self.scale_factor ** 2 # sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5 * self.N * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y) A = -0.5 * self.num_data * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y)
# B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2) # B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A))
else: else:
A = -0.5 * self.N * self.input_dim * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT A = -0.5 * self.num_data * self.input_dim * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
# B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2) # B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2)
B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A))
C = -self.input_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2)) C = -self.input_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
@ -266,9 +266,9 @@ class BayesianGPLVM(SparseGP, GPLVM):
def _debug_filter_params(self, x): def _debug_filter_params(self, x):
start, end = 0, self.X.size, start, end = 0, self.X.size,
X = x[start:end].reshape(self.N, self.input_dim) X = x[start:end].reshape(self.num_data, self.input_dim)
start, end = end, end + self.X_variance.size start, end = end, end + self.X_variance.size
X_v = x[start:end].reshape(self.N, self.input_dim) X_v = x[start:end].reshape(self.num_data, self.input_dim)
start, end = end, end + (self.num_inducing * self.input_dim) start, end = end, end + (self.num_inducing * self.input_dim)
Z = x[start:end].reshape(self.num_inducing, self.input_dim) Z = x[start:end].reshape(self.num_inducing, self.input_dim)
start, end = end, end + self.input_dim start, end = end, end + self.input_dim

View file

@ -52,7 +52,7 @@ class FITC(SparseGP):
else: else:
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
assert self.likelihood.input_dim == 1 assert self.likelihood.input_dim == 1
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N))) tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data)))
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp) self.A = tdot(tmp)
@ -108,7 +108,7 @@ class FITC(SparseGP):
self._dpsi1_dX_jkj = 0 self._dpsi1_dX_jkj = 0
self._dpsi1_dtheta_jkj = 0 self._dpsi1_dtheta_jkj = 0
for i, V_n, alpha_n, gamma_n, gamma_k in zip(range(self.N), self.V_star, alpha, gamma_2, gamma_3): for i, V_n, alpha_n, gamma_n, gamma_k in zip(range(self.num_data), self.V_star, alpha, gamma_2, gamma_3):
K_pp_K = np.dot(Kmmipsi1[:, i:(i + 1)], Kmmipsi1[:, i:(i + 1)].T) K_pp_K = np.dot(Kmmipsi1[:, i:(i + 1)], Kmmipsi1[:, i:(i + 1)].T)
# Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1 # Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1
@ -155,7 +155,7 @@ class FITC(SparseGP):
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.N * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y) A = -0.5 * self.num_data * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.input_dim * (np.sum(np.log(np.diag(self.LB)))) C = -self.input_dim * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D return A + C + D

View file

@ -16,7 +16,7 @@ class FITCClassification(FITC):
:param X: input observations :param X: input observations
:param Y: observed values :param Y: observed values
:param likelihood: a GPy likelihood, defaults to binomial with probit link_function :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function
:param kernel: a GPy kernel, defaults to rbf+white :param kernel: a GPy kernel, defaults to rbf+white
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True :type normalize_X: False|True

View file

@ -2,16 +2,9 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
import pylab as pb from scipy import linalg
from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot from GPy.core.sparse_gp import SparseGP
from ..util.plot import gpplot from GPy.util.linalg import mdot
from .. import kern
from scipy import stats, linalg
<<<<<<< HEAD:GPy/models/generalized_FITC.py
from sparse_GP import sparse_GP
=======
from ..core import SparseGP
>>>>>>> 7040b26f41f382edfdca3d3f7b689b9bbfc1a54f:GPy/models/generalized_fitc.py
def backsub_both_sides(L, X): def backsub_both_sides(L, X):
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky""" """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
@ -45,17 +38,13 @@ class GeneralizedFITC(SparseGP):
self.num_inducing = self.Z.shape[0] self.num_inducing = self.Z.shape[0]
self.true_precision = likelihood.precision self.true_precision = likelihood.precision
<<<<<<< HEAD:GPy/models/generalized_FITC.py
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
=======
super(GeneralizedFITC, self).__init__(X, likelihood, kernel=kernel, Z=self.Z, X_variance=X_variance, normalize_X=normalize_X) super(GeneralizedFITC, self).__init__(X, likelihood, kernel=kernel, Z=self.Z, X_variance=X_variance, normalize_X=normalize_X)
self._set_params(self._get_params()) self._set_params(self._get_params())
>>>>>>> 7040b26f41f382edfdca3d3f7b689b9bbfc1a54f:GPy/models/generalized_fitc.py
def _set_params(self, p): def _set_params(self, p):
self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim) self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.num_params])
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) self.likelihood._set_params(p[self.Z.size + self.kern.num_params:])
self._compute_kernel_matrices() self._compute_kernel_matrices()
self._computations() self._computations()
self._FITC_computations() self._FITC_computations()
@ -104,7 +93,7 @@ class GeneralizedFITC(SparseGP):
self.P = Iplus_Dprod_i[:, None] * self.psi1.T self.P = Iplus_Dprod_i[:, None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi, self.psi1) self.RPT0 = np.dot(self.Lmi, self.psi1)
self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0, ((1. - Iplus_Dprod_i) / self.Diag0)[:, None] * self.RPT0.T)) self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0, ((1. - Iplus_Dprod_i) / self.Diag0)[:, None] * self.RPT0.T))
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1) self.R, info = linalg.lapack.dtrtrs(self.L, self.Lmi, lower=1)
self.RPT = np.dot(self.R, self.P.T) self.RPT = np.dot(self.R, self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T, self.RPT) self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T, self.RPT)
self.w = self.Diag * self.likelihood.v_tilde self.w = self.Diag * self.likelihood.v_tilde
@ -112,9 +101,9 @@ class GeneralizedFITC(SparseGP):
self.mu = self.w + np.dot(self.P, self.Gamma) self.mu = self.w + np.dot(self.P, self.Gamma)
# Remove extra term from dL_dpsi1 # Remove extra term from dL_dpsi1
self.dL_dpsi1 -= mdot(self.Lmi.T,Lmipsi1*self.likelihood.precision.flatten().reshape(1,self.N)) self.dL_dpsi1 -= mdot(self.Lmi.T,Lmipsi1 * self.likelihood.precision.flatten().reshape(1,self.num_data))
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm) #self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
#self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB #self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.num_data)) #dB
#########333333 #########333333
# self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) # self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
@ -142,7 +131,7 @@ class GeneralizedFITC(SparseGP):
else: else:
raise NotImplementedError, "homoscedastic derivatives not implemented" raise NotImplementedError, "homoscedastic derivatives not implemented"
#likelihood is not heterscedatic #likelihood is not heterscedatic
#self.partial_for_likelihood = - 0.5 * self.N*self.input_dim*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2 #self.partial_for_likelihood = - 0.5 * self.num_data*self.input_dim*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
#self.partial_for_likelihood += 0.5 * self.input_dim * trace_dot(self.Bi,self.A)*self.likelihood.precision #self.partial_for_likelihood += 0.5 * self.input_dim * trace_dot(self.Bi,self.A)*self.likelihood.precision
#self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1)) #self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
#TODO partial derivative vector for the likelihood not implemented #TODO partial derivative vector for the likelihood not implemented
@ -164,9 +153,9 @@ class GeneralizedFITC(SparseGP):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor ** 2 sf2 = self.scale_factor ** 2
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.input_dim*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) A = -0.5*self.num_data*self.input_dim*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
else: else:
A = -0.5*self.N*self.input_dim*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT A = -0.5*self.num_data*self.input_dim*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
C = -self.input_dim * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.num_inducing*np.log(sf2)) C = -self.input_dim * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.num_inducing*np.log(sf2))
#C = -0.5*self.input_dim * (self.B_logdet + self.num_inducing*np.log(sf2)) #C = -0.5*self.input_dim * (self.B_logdet + self.num_inducing*np.log(sf2))
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V)) D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V))

View file

@ -6,7 +6,7 @@ import numpy as np
import pylab as pb import pylab as pb
import sys, pdb import sys, pdb
from .. import kern from .. import kern
from ..core import model from ..core import Model
from ..util.linalg import pdinv, PCA from ..util.linalg import pdinv, PCA
from ..core import GP from ..core import GP
from ..likelihoods import Gaussian from ..likelihoods import Gaussian
@ -42,13 +42,13 @@ class GPLVM(GP):
return np.random.randn(Y.shape[0], input_dim) return np.random.randn(Y.shape[0], input_dim)
def _get_param_names(self): def _get_param_names(self):
return sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.N)],[]) + GP._get_param_names(self) return sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.num_data)],[]) + GP._get_param_names(self)
def _get_params(self): def _get_params(self):
return np.hstack((self.X.flatten(), GP._get_params(self))) return np.hstack((self.X.flatten(), GP._get_params(self)))
def _set_params(self,x): def _set_params(self,x):
self.X = x[:self.N*self.input_dim].reshape(self.N,self.input_dim).copy() self.X = x[:self.num_data*self.input_dim].reshape(self.num_data,self.input_dim).copy()
GP._set_params(self, x[self.X.size:]) GP._set_params(self, x[self.X.size:])
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):

View file

@ -3,7 +3,7 @@ Created on 10 Apr 2013
@author: Max Zwiessele @author: Max Zwiessele
''' '''
from GPy.core import model from GPy.core import Model
from GPy.core import SparseGP from GPy.core import SparseGP
from GPy.util.linalg import PCA from GPy.util.linalg import PCA
import numpy import numpy
@ -12,7 +12,7 @@ import pylab
from GPy.kern.kern import kern from GPy.kern.kern import kern
from GPy.models.bayesian_gplvm import BayesianGPLVM from GPy.models.bayesian_gplvm import BayesianGPLVM
class MRD(model): class MRD(Model):
""" """
Do MRD on given Datasets in Ylist. Do MRD on given Datasets in Ylist.
All Ys in likelihood_list are in [N x Dn], where Dn can be different per Yn, All Ys in likelihood_list are in [N x Dn], where Dn can be different per Yn,
@ -44,7 +44,7 @@ class MRD(model):
:param kernels: list of kernels or kernel shared for all BGPLVMS :param kernels: list of kernels or kernel shared for all BGPLVMS
:type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default) :type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default)
""" """
def __init__(self, likelihood_or_Y_list, input_dim, M=10, names=None, def __init__(self, likelihood_or_Y_list, input_dim, num_inducing=10, names=None,
kernels=None, initx='PCA', kernels=None, initx='PCA',
initz='permute', _debug=False, **kw): initz='permute', _debug=False, **kw):
if names is None: if names is None:
@ -61,24 +61,24 @@ class MRD(model):
assert not ('kernel' in kw), "pass kernels through `kernels` argument" assert not ('kernel' in kw), "pass kernels through `kernels` argument"
self.input_dim = input_dim self.input_dim = input_dim
self.num_inducing = M self.num_inducing = num_inducing
self._debug = _debug self._debug = _debug
self._init = True self._init = True
X = self._init_X(initx, likelihood_or_Y_list) X = self._init_X(initx, likelihood_or_Y_list)
Z = self._init_Z(initz, X) Z = self._init_Z(initz, X)
self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)] self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, num_inducing=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)]
del self._init del self._init
self.gref = self.bgplvms[0] self.gref = self.bgplvms[0]
nparams = numpy.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms]) nparams = numpy.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms])
self.nparams = nparams.cumsum() self.nparams = nparams.cumsum()
self.N = self.gref.N self.num_data = self.gref.num_data
self.NQ = self.N * self.input_dim self.NQ = self.num_data * self.input_dim
self.MQ = self.num_inducing * self.input_dim self.MQ = self.num_inducing * self.input_dim
model.__init__(self) # @UndefinedVariable Model.__init__(self) # @UndefinedVariable
self._set_params(self._get_params()) self._set_params(self._get_params())
@property @property
@ -142,8 +142,8 @@ class MRD(model):
self._init_Z(initz, self.X) self._init_Z(initz, self.X)
def _get_param_names(self): def _get_param_names(self):
# X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
# S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
n1 = self.gref._get_param_names() n1 = self.gref._get_param_names()
n1var = n1[:self.NQ * 2 + self.MQ] n1var = n1[:self.NQ * 2 + self.MQ]
map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x), map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x),
@ -169,8 +169,8 @@ class MRD(model):
return params return params
# def _set_var_params(self, g, X, X_var, Z): # def _set_var_params(self, g, X, X_var, Z):
# g.X = X.reshape(self.N, self.input_dim) # g.X = X.reshape(self.num_data, self.input_dim)
# g.X_variance = X_var.reshape(self.N, self.input_dim) # g.X_variance = X_var.reshape(self.num_data, self.input_dim)
# g.Z = Z.reshape(self.num_inducing, self.input_dim) # g.Z = Z.reshape(self.num_inducing, self.input_dim)
# #
# def _set_kern_params(self, g, p): # def _set_kern_params(self, g, p):

View file

@ -26,7 +26,7 @@ class SparseGPClassification(SparseGP):
""" """
def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10): def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10):
if kernel is None: if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
@ -38,7 +38,7 @@ class SparseGPClassification(SparseGP):
raise Warning, 'likelihood.data and Y are different.' raise Warning, 'likelihood.data and Y are different.'
if Z is None: if Z is None:
i = np.random.permutation(X.shape[0])[:M] i = np.random.permutation(X.shape[0])[:num_inducing]
Z = X[i].copy() Z = X[i].copy()
else: else:
assert Z.shape[1]==X.shape[1] assert Z.shape[1]==X.shape[1]

View file

@ -26,14 +26,14 @@ class SparseGPRegression(SparseGP):
""" """
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10, X_variance=None): def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10, X_variance=None):
# kern defaults to rbf (plus white for stability) # kern defaults to rbf (plus white for stability)
if kernel is None: if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3) kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3)
# Z defaults to a subset of the data # Z defaults to a subset of the data
if Z is None: if Z is None:
i = np.random.permutation(X.shape[0])[:M] i = np.random.permutation(X.shape[0])[:num_inducing]
Z = X[i].copy() Z = X[i].copy()
else: else:
assert Z.shape[1] == X.shape[1] assert Z.shape[1] == X.shape[1]

View file

@ -23,19 +23,19 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
:type init: 'PCA'|'random' :type init: 'PCA'|'random'
""" """
def __init__(self, Y, input_dim, kernel=None, init='PCA', M=10): def __init__(self, Y, input_dim, kernel=None, init='PCA', num_inducing=10):
X = self.initialise_latent(init, input_dim, Y) X = self.initialise_latent(init, input_dim, Y)
SparseGPRegression.__init__(self, X, Y, kernel=kernel, M=M) SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
def _get_param_names(self): def _get_param_names(self):
return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], [])
+ SparseGPRegression._get_param_names(self)) + SparseGPRegression._get_param_names(self))
def _get_params(self): def _get_params(self):
return np.hstack((self.X.flatten(), SparseGPRegression._get_params(self))) return np.hstack((self.X.flatten(), SparseGPRegression._get_params(self)))
def _set_params(self, x): def _set_params(self, x):
self.X = x[:self.X.size].reshape(self.N, self.input_dim).copy() self.X = x[:self.X.size].reshape(self.num_data, self.input_dim).copy()
SparseGPRegression._set_params(self, x[self.X.size:]) SparseGPRegression._set_params(self, x[self.X.size:])
def log_likelihood(self): def log_likelihood(self):

View file

@ -8,67 +8,67 @@ from GPy.models.bayesian_gplvm import BayesianGPLVM
class BGPLVMTests(unittest.TestCase): class BGPLVMTests(unittest.TestCase):
def test_bias_kern(self): def test_bias_kern(self):
N, M, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_linear_kern(self): def test_linear_kern(self):
N, M, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_rbf_kern(self): def test_rbf_kern(self):
N, M, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_rbf_bias_kern(self): def test_rbf_bias_kern(self):
N, M, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
#@unittest.skip('psi2 cross terms are NotImplemented for this combination') #@unittest.skip('psi2 cross terms are NotImplemented for this combination')
def test_linear_bias_kern(self): def test_linear_bias_kern(self):
N, M, input_dim, D = 30, 5, 4, 30 N, num_inducing, input_dim, D = 30, 5, 4, 30
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
Y -= Y.mean(axis=0) Y -= Y.mean(axis=0)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -12,31 +12,31 @@ from nose.tools import nottest
import sys import sys
class ExamplesTests(unittest.TestCase): class ExamplesTests(unittest.TestCase):
def _checkgrad(self, model): def _checkgrad(self, Model):
self.assertTrue(model.checkgrad()) self.assertTrue(Model.checkgrad())
def _model_instance(self, model): def _model_instance(self, Model):
self.assertTrue(isinstance(model, GPy.models)) self.assertTrue(isinstance(Model, GPy.models))
""" """
def model_instance_generator(model): def model_instance_generator(Model):
def check_model_returned(self): def check_model_returned(self):
self._model_instance(model) self._model_instance(Model)
return check_model_returned return check_model_returned
def checkgrads_generator(model): def checkgrads_generator(Model):
def model_checkgrads(self): def model_checkgrads(self):
self._checkgrad(model) self._checkgrad(Model)
return model_checkgrads return model_checkgrads
""" """
def model_checkgrads(model): def model_checkgrads(Model):
model.randomize() Model.randomize()
assert model.checkgrad() assert Model.checkgrad()
def model_instance(model): def model_instance(Model):
assert isinstance(model, GPy.core.model) assert isinstance(Model, GPy.core.Model)
@nottest @nottest
def test_models(): def test_models():
@ -57,25 +57,25 @@ def test_models():
continue continue
print "Testing example: ", example[0] print "Testing example: ", example[0]
# Generate model # Generate Model
model = example[1]() Model = example[1]()
print model print Model
# Create tests for instance check # Create tests for instance check
""" """
test = model_instance_generator(model) test = model_instance_generator(Model)
test.__name__ = 'test_instance_%s' % example[0] test.__name__ = 'test_instance_%s' % example[0]
setattr(ExamplesTests, test.__name__, test) setattr(ExamplesTests, test.__name__, test)
#Create tests for checkgrads check #Create tests for checkgrads check
test = checkgrads_generator(model) test = checkgrads_generator(Model)
test.__name__ = 'test_checkgrads_%s' % example[0] test.__name__ = 'test_checkgrads_%s' % example[0]
setattr(ExamplesTests, test.__name__, test) setattr(ExamplesTests, test.__name__, test)
""" """
model_checkgrads.description = 'test_checkgrads_%s' % example[0] model_checkgrads.description = 'test_checkgrads_%s' % example[0]
yield model_checkgrads, model yield model_checkgrads, Model
model_instance.description = 'test_instance_%s' % example[0] model_instance.description = 'test_instance_%s' % example[0]
yield model_instance, model yield model_instance, Model
if __name__ == "__main__": if __name__ == "__main__":
print "Running unit tests, please be (very) patient..." print "Running unit tests, please be (very) patient..."

View file

@ -7,7 +7,7 @@ import GPy
class GPLVMTests(unittest.TestCase): class GPLVMTests(unittest.TestCase):
def test_bias_kern(self): def test_bias_kern(self):
N, M, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
@ -19,7 +19,7 @@ class GPLVMTests(unittest.TestCase):
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_linear_kern(self): def test_linear_kern(self):
N, M, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
@ -31,7 +31,7 @@ class GPLVMTests(unittest.TestCase):
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_rbf_kern(self): def test_rbf_kern(self):
N, M, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)

View file

@ -21,7 +21,7 @@ class KernelTests(unittest.TestCase):
""" """
X = np.random.rand(30, 4) X = np.random.rand(30, 4)
K = np.dot(X, X.T) K = np.dot(X, X.T)
kernel = GPy.kern.fixed(4, K) kernel = GPy.kern.Fixed(4, K)
Y = np.ones((30,1)) Y = np.ones((30,1))
m = GPy.models.GPRegression(X,Y,kernel=kernel) m = GPy.models.GPRegression(X,Y,kernel=kernel)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -14,7 +14,7 @@ class MRDTests(unittest.TestCase):
def test_gradients(self): def test_gradients(self):
num_m = 3 num_m = 3
N, M, input_dim, D = 20, 8, 6, 20 N, num_inducing, input_dim, D = 20, 8, 6, 20
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
@ -23,7 +23,7 @@ class MRDTests(unittest.TestCase):
Ylist = [np.random.multivariate_normal(np.zeros(N), K, input_dim).T for _ in range(num_m)] Ylist = [np.random.multivariate_normal(np.zeros(N), K, input_dim).T for _ in range(num_m)]
likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist] likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist]
m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, M=M) m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, num_inducing=num_inducing)
m.ensure_default_constraints() m.ensure_default_constraints()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -8,10 +8,10 @@ import numpy
import GPy import GPy
import itertools import itertools
from GPy.core import model from GPy.core import Model
class PsiStatModel(model): class PsiStatModel(Model):
def __init__(self, which, X, X_variance, Z, M, kernel): def __init__(self, which, X, X_variance, Z, num_inducing, kernel):
self.which = which self.which = which
self.X = X self.X = X
self.X_variance = X_variance self.X_variance = X_variance
@ -64,8 +64,8 @@ class DPsiStatTest(unittest.TestCase):
def testPsi0(self): def testPsi0(self):
for k in self.kernels: for k in self.kernels:
m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
try: try:
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts)))
except: except:
@ -74,33 +74,33 @@ class DPsiStatTest(unittest.TestCase):
# def testPsi1(self): # def testPsi1(self):
# for k in self.kernels: # for k in self.kernels:
# m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z, # m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
# M=self.M, kernel=k) # num_inducing=self.num_inducing, kernel=k)
# assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts))) # assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_lin(self): def testPsi2_lin(self):
k = self.kernels[0] k = self.kernels[0]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_lin_bia(self): def testPsi2_lin_bia(self):
k = self.kernels[3] k = self.kernels[3]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_rbf(self): def testPsi2_rbf(self):
k = self.kernels[1] k = self.kernels[1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_rbf_bia(self): def testPsi2_rbf_bia(self):
k = self.kernels[-1] k = self.kernels[-1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_bia(self): def testPsi2_bia(self):
k = self.kernels[2] k = self.kernels[2]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.num_inducing, kernel=k) num_inducing=self.num_inducing, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
@ -122,11 +122,11 @@ if __name__ == "__main__":
numpy.random.seed(0) numpy.random.seed(0)
input_dim = 5 input_dim = 5
N = 50 N = 50
M = 10 num_inducing = 10
D = 15 D = 15
X = numpy.random.randn(N, input_dim) X = numpy.random.randn(N, input_dim)
X_var = .5 * numpy.ones_like(X) + .1 * numpy.clip(numpy.random.randn(*X.shape), 0, 1) X_var = .5 * numpy.ones_like(X) + .1 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
Z = numpy.random.permutation(X)[:M] Z = numpy.random.permutation(X)[:num_inducing]
Y = X.dot(numpy.random.randn(input_dim, D)) Y = X.dot(numpy.random.randn(input_dim, D))
# kernel = GPy.kern.bias(input_dim) # kernel = GPy.kern.bias(input_dim)
# #
@ -148,7 +148,7 @@ if __name__ == "__main__":
# m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, # m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim)) # num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim))
m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim))) num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)))
m3.ensure_default_constraints() m3.ensure_default_constraints()
# + GPy.kern.bias(input_dim)) # + GPy.kern.bias(input_dim))
# m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, # m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,

View file

@ -8,38 +8,38 @@ from GPy.models.sparse_gplvm import SparseGPLVM
class sparse_GPLVMTests(unittest.TestCase): class sparse_GPLVMTests(unittest.TestCase):
def test_bias_kern(self): def test_bias_kern(self):
N, M, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = SparseGPLVM(Y, input_dim, kernel=k, M=M) m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@unittest.skip('linear kernels do not have dKdiag_dX') @unittest.skip('linear kernels do not have dKdiag_dX')
def test_linear_kern(self): def test_linear_kern(self):
N, M, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = SparseGPLVM(Y, input_dim, kernel=k, M=M) m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_rbf_kern(self): def test_rbf_kern(self):
N, M, input_dim, D = 10, 3, 2, 4 N, num_inducing, input_dim, D = 10, 3, 2, 4
X = np.random.rand(N, input_dim) X = np.random.rand(N, input_dim)
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T
k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001)
m = SparseGPLVM(Y, input_dim, kernel=k, M=M) m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
m.ensure_default_constraints() m.ensure_default_constraints()
m.randomize() m.randomize()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())

View file

@ -196,9 +196,9 @@ class GradientTests(unittest.TestCase):
k = GPy.kern.rbf(1) + GPy.kern.white(1) k = GPy.kern.rbf(1) + GPy.kern.white(1)
Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None] Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None]
distribution = GPy.likelihoods.likelihood_functions.binomial() distribution = GPy.likelihoods.likelihood_functions.Binomial()
likelihood = GPy.likelihoods.EP(Y, distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
#likelihood = GPy.inference.likelihoods.binomial(Y) #likelihood = GPy.inference.likelihoods.Binomial(Y)
m = GPy.models.generalized_FITC(X,likelihood,k,inducing=4) m = GPy.models.generalized_FITC(X,likelihood,k,inducing=4)
m.constrain_positive('(var|len)') m.constrain_positive('(var|len)')
m.approximate_likelihood() m.approximate_likelihood()

View file

@ -2,9 +2,9 @@ import pylab as pb
import numpy as np import numpy as np
from .. import util from .. import util
def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40): def plot_latent(Model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40):
""" """
:param labels: a np.array of size model.N containing labels for the points (can be number, strings, etc) :param labels: a np.array of size Model.N containing labels for the points (can be number, strings, etc)
:param resolution: the resolution of the grid on which to evaluate the predictive variance :param resolution: the resolution of the grid on which to evaluate the predictive variance
""" """
if ax is None: if ax is None:
@ -12,26 +12,26 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None,
util.plot.Tango.reset() util.plot.Tango.reset()
if labels is None: if labels is None:
labels = np.ones(model.N) labels = np.ones(Model.N)
if which_indices is None: if which_indices is None:
if model.input_dim==1: if Model.input_dim==1:
input_1 = 0 input_1 = 0
input_2 = None input_2 = None
if model.input_dim==2: if Model.input_dim==2:
input_1, input_2 = 0,1 input_1, input_2 = 0,1
else: else:
try: try:
input_1, input_2 = np.argsort(model.input_sensitivity())[:2] input_1, input_2 = np.argsort(Model.input_sensitivity())[:2]
except: except:
raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'"
else: else:
input_1, input_2 = which_indices input_1, input_2 = which_indices
#first, plot the output variance as a function of the latent space #first, plot the output variance as a function of the latent space
Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(model.X[:,[input_1, input_2]],resolution=resolution) Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(Model.X[:,[input_1, input_2]],resolution=resolution)
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) Xtest_full = np.zeros((Xtest.shape[0], Model.X.shape[1]))
Xtest_full[:, :2] = Xtest Xtest_full[:, :2] = Xtest
mu, var, low, up = model.predict(Xtest_full) mu, var, low, up = Model.predict(Xtest_full)
var = var[:, :1] var = var[:, :1]
ax.imshow(var.reshape(resolution, resolution).T, ax.imshow(var.reshape(resolution, resolution).T,
extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower') extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower')
@ -55,12 +55,12 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None,
m = marker m = marker
index = np.nonzero(labels==ul)[0] index = np.nonzero(labels==ul)[0]
if model.input_dim==1: if Model.input_dim==1:
x = model.X[index,input_1] x = Model.X[index,input_1]
y = np.zeros(index.size) y = np.zeros(index.size)
else: else:
x = model.X[index,input_1] x = Model.X[index,input_1]
y = model.X[index,input_2] y = Model.X[index,input_2]
ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label) ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label)
ax.set_xlabel('latent dimension %i'%input_1) ax.set_xlabel('latent dimension %i'%input_1)
@ -76,16 +76,16 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None,
return ax return ax
def plot_latent_indices(model, which_indices=None, *args, **kwargs): def plot_latent_indices(Model, which_indices=None, *args, **kwargs):
if which_indices is None: if which_indices is None:
try: try:
input_1, input_2 = np.argsort(model.input_sensitivity())[:2] input_1, input_2 = np.argsort(Model.input_sensitivity())[:2]
except: except:
raise ValueError, "cannot Automatically determine which dimensions to plot, please pass 'which_indices'" raise ValueError, "cannot Automatically determine which dimensions to plot, please pass 'which_indices'"
else: else:
input_1, input_2 = which_indices input_1, input_2 = which_indices
ax = plot_latent(model, which_indices=[input_1, input_2], *args, **kwargs) ax = plot_latent(Model, which_indices=[input_1, input_2], *args, **kwargs)
# TODO: Here test if there are inducing points... # TODO: Here test if there are inducing points...
ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w') ax.plot(Model.Z[:, input_1], Model.Z[:, input_2], '^w')
return ax return ax

View file

@ -43,16 +43,16 @@ class vector_show(data_show):
class lvm(data_show): class lvm(data_show):
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]): def __init__(self, vals, Model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]):
"""Visualize a latent variable model """Visualize a latent variable Model
:param model: the latent variable model to visualize. :param Model: the latent variable Model to visualize.
:param data_visualize: the object used to visualize the data which has been modelled. :param data_visualize: the object used to visualize the data which has been modelled.
:type data_visualize: visualize.data_show type. :type data_visualize: visualize.data_show type.
:param latent_axes: the axes where the latent visualization should be plotted. :param latent_axes: the axes where the latent visualization should be plotted.
""" """
if vals == None: if vals == None:
vals = model.X[0] vals = Model.X[0]
data_show.__init__(self, vals, axes=latent_axes) data_show.__init__(self, vals, axes=latent_axes)
@ -68,13 +68,13 @@ class lvm(data_show):
self.cid = latent_axes[0].figure.canvas.mpl_connect('axes_enter_event', self.on_enter) self.cid = latent_axes[0].figure.canvas.mpl_connect('axes_enter_event', self.on_enter)
self.data_visualize = data_visualize self.data_visualize = data_visualize
self.model = model self.Model = Model
self.latent_axes = latent_axes self.latent_axes = latent_axes
self.sense_axes = sense_axes self.sense_axes = sense_axes
self.called = False self.called = False
self.move_on = False self.move_on = False
self.latent_index = latent_index self.latent_index = latent_index
self.latent_dim = model.input_dim self.latent_dim = Model.input_dim
# The red cross which shows current latent point. # The red cross which shows current latent point.
self.latent_values = vals self.latent_values = vals
@ -85,7 +85,7 @@ class lvm(data_show):
def modify(self, vals): def modify(self, vals):
"""When latent values are modified update the latent representation and ulso update the output visualization.""" """When latent values are modified update the latent representation and ulso update the output visualization."""
self.vals = vals.copy() self.vals = vals.copy()
y = self.model.predict(self.vals)[0] y = self.Model.predict(self.vals)[0]
self.data_visualize.modify(y) self.data_visualize.modify(y)
self.latent_handle.set_data(self.vals[self.latent_index[0]], self.vals[self.latent_index[1]]) self.latent_handle.set_data(self.vals[self.latent_index[0]], self.vals[self.latent_index[1]])
self.axes.figure.canvas.draw() self.axes.figure.canvas.draw()
@ -113,15 +113,15 @@ class lvm(data_show):
# A click in the bar chart axis for selection a dimension. # A click in the bar chart axis for selection a dimension.
if self.sense_axes != None: if self.sense_axes != None:
self.sense_axes.cla() self.sense_axes.cla()
self.sense_axes.bar(np.arange(self.model.input_dim),1./self.model.input_sensitivity(),color='b') self.sense_axes.bar(np.arange(self.Model.input_dim),1./self.Model.input_sensitivity(),color='b')
if self.latent_index[1] == self.latent_index[0]: if self.latent_index[1] == self.latent_index[0]:
self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='y') self.sense_axes.bar(np.array(self.latent_index[0]),1./self.Model.input_sensitivity()[self.latent_index[0]],color='y')
self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='y') self.sense_axes.bar(np.array(self.latent_index[1]),1./self.Model.input_sensitivity()[self.latent_index[1]],color='y')
else: else:
self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='g') self.sense_axes.bar(np.array(self.latent_index[0]),1./self.Model.input_sensitivity()[self.latent_index[0]],color='g')
self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='r') self.sense_axes.bar(np.array(self.latent_index[1]),1./self.Model.input_sensitivity()[self.latent_index[1]],color='r')
self.sense_axes.figure.canvas.draw() self.sense_axes.figure.canvas.draw()
@ -131,21 +131,21 @@ class lvm_subplots(lvm):
latent_axes is a np array of dimension np.ceil(input_dim/2), latent_axes is a np array of dimension np.ceil(input_dim/2),
one for each pair of the latent dimensions. one for each pair of the latent dimensions.
""" """
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None): def __init__(self, vals, Model, data_visualize, latent_axes=None, sense_axes=None):
self.nplots = int(np.ceil(model.input_dim/2.))+1 self.nplots = int(np.ceil(Model.input_dim/2.))+1
assert len(latent_axes)==self.nplots assert len(latent_axes)==self.nplots
if vals==None: if vals==None:
vals = model.X[0, :] vals = Model.X[0, :]
self.latent_values = vals self.latent_values = vals
for i, axis in enumerate(latent_axes): for i, axis in enumerate(latent_axes):
if i == self.nplots-1: if i == self.nplots-1:
if self.nplots*2!=model.input_dim: if self.nplots*2!=Model.input_dim:
latent_index = [i*2, i*2] latent_index = [i*2, i*2]
lvm.__init__(self, self.latent_vals, model, data_visualize, axis, sense_axes, latent_index=latent_index) lvm.__init__(self, self.latent_vals, Model, data_visualize, axis, sense_axes, latent_index=latent_index)
else: else:
latent_index = [i*2, i*2+1] latent_index = [i*2, i*2+1]
lvm.__init__(self, self.latent_vals, model, data_visualize, axis, latent_index=latent_index) lvm.__init__(self, self.latent_vals, Model, data_visualize, axis, latent_index=latent_index)
@ -158,7 +158,7 @@ class lvm_dimselect(lvm):
GPy.examples.dimensionality_reduction.BGPVLM_oil() GPy.examples.dimensionality_reduction.BGPVLM_oil()
""" """
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1]): def __init__(self, vals, Model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1]):
if latent_axes==None and sense_axes==None: if latent_axes==None and sense_axes==None:
self.fig,(latent_axes,self.sense_axes) = plt.subplots(1,2) self.fig,(latent_axes,self.sense_axes) = plt.subplots(1,2)
elif sense_axes==None: elif sense_axes==None:
@ -167,14 +167,14 @@ class lvm_dimselect(lvm):
else: else:
self.sense_axes = sense_axes self.sense_axes = sense_axes
lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index) lvm.__init__(self,vals,Model,data_visualize,latent_axes,sense_axes,latent_index)
print "use left and right mouse butons to select dimensions" print "use left and right mouse butons to select dimensions"
def on_click(self, event): def on_click(self, event):
if event.inaxes==self.sense_axes: if event.inaxes==self.sense_axes:
new_index = max(0,min(int(np.round(event.xdata-0.5)),self.model.input_dim-1)) new_index = max(0,min(int(np.round(event.xdata-0.5)),self.Model.input_dim-1))
if event.button == 1: if event.button == 1:
# Make it red if and y-axis (red=port=left) if it is a left button click # Make it red if and y-axis (red=port=left) if it is a left button click
self.latent_index[1] = new_index self.latent_index[1] = new_index
@ -185,7 +185,7 @@ class lvm_dimselect(lvm):
self.show_sensitivities() self.show_sensitivities()
self.latent_axes.cla() self.latent_axes.cla()
self.model.plot_latent(which_indices=self.latent_index, self.Model.plot_latent(which_indices=self.latent_index,
ax=self.latent_axes) ax=self.latent_axes)
self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0] self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
self.modify(self.latent_values) self.modify(self.latent_values)
@ -199,7 +199,7 @@ class lvm_dimselect(lvm):
def on_leave(self,event): def on_leave(self,event):
latent_values = self.latent_values.copy() latent_values = self.latent_values.copy()
y = self.model.predict(latent_values[None,:])[0] y = self.Model.predict(latent_values[None,:])[0]
self.data_visualize.modify(y) self.data_visualize.modify(y)