mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-09 03:52:39 +02:00
merged in kern chancges
This commit is contained in:
commit
847fdef9c8
54 changed files with 914 additions and 1280 deletions
|
|
@ -43,7 +43,8 @@ class GP(Model):
|
|||
else:
|
||||
self.Y_metadata = None
|
||||
|
||||
assert isinstance(kernel, kern.kern)
|
||||
assert isinstance(kernel, kern.Kern)
|
||||
assert self.input_dim == kernel.input_dim
|
||||
self.kern = kernel
|
||||
|
||||
assert isinstance(likelihood, likelihoods.Likelihood)
|
||||
|
|
@ -70,7 +71,7 @@ class GP(Model):
|
|||
def log_likelihood(self):
|
||||
return self._log_marginal_likelihood
|
||||
|
||||
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False):
|
||||
def _raw_predict(self, _Xnew, full_cov=False):
|
||||
"""
|
||||
Internal helper function for making predictions, does not account
|
||||
for normalization or likelihood
|
||||
|
|
@ -80,29 +81,27 @@ class GP(Model):
|
|||
diagonal of the covariance is returned.
|
||||
|
||||
"""
|
||||
Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T
|
||||
Kx = self.kern.K(_Xnew, self.X).T
|
||||
#LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1)
|
||||
WiKx = np.dot(self.posterior.woodbury_inv, Kx)
|
||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||
if full_cov:
|
||||
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
|
||||
Kxx = self.kern.K(_Xnew)
|
||||
#var = Kxx - tdot(LiKx.T)
|
||||
var = np.dot(Kx.T, WiKx)
|
||||
else:
|
||||
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
|
||||
Kxx = self.kern.Kdiag(_Xnew)
|
||||
#var = Kxx - np.sum(LiKx*LiKx, 0)
|
||||
var = Kxx - np.sum(WiKx*Kx, 0)
|
||||
var = var.reshape(-1, 1)
|
||||
return mu, var
|
||||
|
||||
def predict(self, Xnew, which_parts='all', full_cov=False, **likelihood_args):
|
||||
def predict(self, Xnew, full_cov=False, **likelihood_args):
|
||||
"""
|
||||
Predict the function(s) at the new point(s) Xnew.
|
||||
|
||||
:param Xnew: The points at which to make a prediction
|
||||
:type Xnew: np.ndarray, Nnew x self.input_dim
|
||||
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
||||
:type which_parts: ('all', list of bools)
|
||||
:param full_cov: whether to return the full covariance matrix, or just
|
||||
the diagonal
|
||||
:type full_cov: bool
|
||||
|
|
@ -118,13 +117,13 @@ class GP(Model):
|
|||
|
||||
"""
|
||||
#predict the latent function values
|
||||
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts)
|
||||
mu, var = self._raw_predict(Xnew, full_cov=full_cov)
|
||||
|
||||
# now push through likelihood
|
||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
|
||||
return mean, var, _025pm, _975pm
|
||||
|
||||
def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True):
|
||||
def posterior_samples_f(self,X,size=10, full_cov=True):
|
||||
"""
|
||||
Samples the posterior GP at the points X.
|
||||
|
||||
|
|
@ -132,13 +131,11 @@ class GP(Model):
|
|||
:type X: np.ndarray, Nnew x self.input_dim.
|
||||
:param size: the number of a posteriori samples.
|
||||
:type size: int.
|
||||
:param which_parts: which of the kernel functions to use (additively).
|
||||
:type which_parts: 'all', or list of bools.
|
||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
|
||||
:type full_cov: bool.
|
||||
:returns: Ysim: set of simulations, a Numpy array (N x samples).
|
||||
"""
|
||||
m, v = self._raw_predict(X, which_parts=which_parts, full_cov=full_cov)
|
||||
m, v = self._raw_predict(X, full_cov=full_cov)
|
||||
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
|
||||
if not full_cov:
|
||||
Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
|
||||
|
|
@ -147,7 +144,7 @@ class GP(Model):
|
|||
|
||||
return Ysim
|
||||
|
||||
def posterior_samples(self,X,size=10,which_parts='all',full_cov=True,noise_model=None):
|
||||
def posterior_samples(self,X,size=10, full_cov=True,noise_model=None):
|
||||
"""
|
||||
Samples the posterior GP at the points X.
|
||||
|
||||
|
|
@ -155,15 +152,13 @@ class GP(Model):
|
|||
:type X: np.ndarray, Nnew x self.input_dim.
|
||||
:param size: the number of a posteriori samples.
|
||||
:type size: int.
|
||||
:param which_parts: which of the kernel functions to use (additively).
|
||||
:type which_parts: 'all', or list of bools.
|
||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
|
||||
:type full_cov: bool.
|
||||
:param noise_model: for mixed noise likelihood, the noise model to use in the samples.
|
||||
:type noise_model: integer.
|
||||
:returns: Ysim: set of simulations, a Numpy array (N x samples).
|
||||
"""
|
||||
Ysim = self.posterior_samples_f(X, size, which_parts=which_parts, full_cov=full_cov)
|
||||
Ysim = self.posterior_samples_f(X, size, full_cov=full_cov)
|
||||
if isinstance(self.likelihood, Gaussian):
|
||||
noise_std = np.sqrt(self.likelihood._get_params())
|
||||
Ysim += np.random.normal(0,noise_std,Ysim.shape)
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable
|
|||
:param input_array: array which this parameter handles
|
||||
:type input_array: numpy.ndarray
|
||||
:param default_constraint: The default constraint for this parameter
|
||||
:type default_constraint:
|
||||
:type default_constraint:
|
||||
|
||||
You can add/remove constraints by calling constrain on the parameter itself, e.g:
|
||||
|
||||
|
|
@ -59,7 +59,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable
|
|||
|
||||
def __init__(self, name, input_array, default_constraint=None):
|
||||
super(Param, self).__init__(name=name, default_constraint=default_constraint)
|
||||
|
||||
|
||||
def __array_finalize__(self, obj):
|
||||
# see InfoArray.__array_finalize__ for comments
|
||||
if obj is None: return
|
||||
|
|
@ -206,7 +206,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable
|
|||
return numpy.r_[a]
|
||||
return numpy.r_[:b]
|
||||
return itertools.imap(f, itertools.izip_longest(slice_index[:self._realndim_], self._realshape_, fillvalue=slice(self.size)))
|
||||
|
||||
|
||||
#===========================================================================
|
||||
# Convenience
|
||||
#===========================================================================
|
||||
|
|
@ -271,7 +271,7 @@ class Param(ObservableArray, Constrainable, Gradcheckable, Indexable, Parentable
|
|||
clean_curr_slice = [s for s in slice_index if numpy.any(s != Ellipsis)]
|
||||
for i in range(self._realndim_-len(clean_curr_slice)):
|
||||
i+=len(clean_curr_slice)
|
||||
clean_curr_slice += range(self._realshape_[i])
|
||||
clean_curr_slice += range(self._realshape_[i])
|
||||
if (all(isinstance(n, (numpy.ndarray, list, tuple)) for n in clean_curr_slice)
|
||||
and len(set(map(len, clean_curr_slice))) <= 1):
|
||||
return numpy.fromiter(itertools.izip(*clean_curr_slice),
|
||||
|
|
@ -437,4 +437,4 @@ class ParamConcatenation(object):
|
|||
start = False
|
||||
return "\n".join(strings)
|
||||
def __repr__(self):
|
||||
return "\n".join(map(repr,self.params))
|
||||
return "\n".join(map(repr,self.params))
|
||||
|
|
|
|||
|
|
@ -33,12 +33,12 @@ class SparseGP(GP):
|
|||
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, X_variance=None, name='sparse gp'):
|
||||
|
||||
# pick a sensible inference method
|
||||
#pick a sensible inference method
|
||||
if inference_method is None:
|
||||
if isinstance(likelihood, likelihoods.Gaussian):
|
||||
inference_method = var_dtc.VarDTC()
|
||||
else:
|
||||
# inference_method = ??
|
||||
#inference_method = ??
|
||||
raise NotImplementedError, "what to do what to do?"
|
||||
print "defaulting to ", inference_method, "for latent function inference"
|
||||
|
||||
|
|
@ -53,38 +53,35 @@ class SparseGP(GP):
|
|||
self.add_parameter(self.Z, index=0)
|
||||
self.parameters_changed()
|
||||
|
||||
def _update_gradients_Z(self, add=False):
|
||||
# The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed)
|
||||
def _gradients_Z(self):
|
||||
#The derivative of the bound wrt the inducing inputs Z ( unless they're all fixed)
|
||||
if not self.Z.is_fixed:
|
||||
if add: self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
||||
else: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
||||
if self.X_variance is None:
|
||||
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
|
||||
self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict)
|
||||
else:
|
||||
self.Z.gradient += self.kern.dpsi1_dZ(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
|
||||
self.Z.gradient += self.kern.dpsi2_dZ(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
|
||||
self.Z.gradient = self.kern.gradients_Z_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict)
|
||||
print self.Z.gradient
|
||||
print id(self.Z)
|
||||
|
||||
def parameters_changed(self):
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
|
||||
self._update_gradients_Z(add=False)
|
||||
self.Z.gradient = self._gradients_Z()
|
||||
|
||||
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
|
||||
def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False):
|
||||
"""
|
||||
Make a prediction for the latent function values
|
||||
"""
|
||||
if X_variance_new is None:
|
||||
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
|
||||
Kx = self.kern.K(self.Z, Xnew)
|
||||
mu = np.dot(Kx.T, self.posterior.woodbury_vector)
|
||||
if full_cov:
|
||||
Kxx = self.kern.K(Xnew, which_parts=which_parts)
|
||||
var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx), Kx, [1,0]).T
|
||||
Kxx = self.kern.K(Xnew)
|
||||
var = Kxx - mdot(Kx.T, self.posterior.woodbury_inv, Kx)
|
||||
else:
|
||||
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts)[:, None]
|
||||
#import ipdb;ipdb.set_trace()
|
||||
var = Kxx - (np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T * Kx.T[:,:,None]).sum(1)
|
||||
Kxx = self.kern.Kdiag(Xnew)
|
||||
var = Kxx - np.sum(Kx * np.dot(self.posterior.woodbury_inv, Kx), 0)
|
||||
else:
|
||||
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented"
|
||||
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts
|
||||
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new)
|
||||
mu = np.dot(Kx, self.Cpsi1V)
|
||||
if full_cov:
|
||||
raise NotImplementedError, "TODO"
|
||||
|
|
@ -92,7 +89,7 @@ class SparseGP(GP):
|
|||
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new)
|
||||
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new)
|
||||
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
|
||||
return mu, var
|
||||
return mu, var[:,None]
|
||||
|
||||
|
||||
def _getstate(self):
|
||||
|
|
|
|||
|
|
@ -22,18 +22,18 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
|
|||
# generate GPLVM-like data
|
||||
X = _np.random.rand(num_inputs, input_dim)
|
||||
lengthscales = _np.random.rand(input_dim)
|
||||
k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True)
|
||||
k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
|
||||
#+ GPy.kern.white(input_dim, 0.01)
|
||||
)
|
||||
K = k.K(X)
|
||||
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
|
||||
|
||||
# k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
|
||||
k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
|
||||
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0)
|
||||
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
|
||||
# k = GPy.kern.RBF_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
|
||||
#k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.RBF(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
|
||||
# k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.RBF(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
|
||||
# k = GPy.kern.RBF(input_dim, .5, 2., ARD=0) + GPy.kern.RBF(input_dim, .3, .2, ARD=0)
|
||||
# k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
|
||||
|
||||
p = .3
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True):
|
|||
data = GPy.util.datasets.oil_100()
|
||||
Y = data['X']
|
||||
# create simple GP model
|
||||
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
|
||||
kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.bias(6)
|
||||
m = GPy.models.GPLVM(Y, 6, kernel=kernel)
|
||||
m.data_labels = data['Y'].argmax(axis=1)
|
||||
if optimize: m.optimize('scg', messages=verbose)
|
||||
|
|
@ -88,7 +88,7 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci
|
|||
Y = Y - Y.mean(0)
|
||||
Y /= Y.std(0)
|
||||
# Create the model
|
||||
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q)
|
||||
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q)
|
||||
m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing)
|
||||
m.data_labels = data['Y'][:N].argmax(axis=1)
|
||||
|
||||
|
|
@ -138,7 +138,7 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
|
|||
(1 - var))) + .001
|
||||
Z = _np.random.permutation(X)[:num_inducing]
|
||||
|
||||
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
|
||||
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
|
||||
|
||||
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
|
||||
m.data_colors = c
|
||||
|
|
@ -164,7 +164,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
|
|||
_np.random.seed(0)
|
||||
data = GPy.util.datasets.oil()
|
||||
|
||||
kernel = GPy.kern.rbf_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2))
|
||||
kernel = GPy.kern.RBF_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2))
|
||||
Y = data['X'][:N]
|
||||
Yn = Gaussian(Y, normalize=True)
|
||||
m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k)
|
||||
|
|
@ -439,7 +439,7 @@ def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
|
|||
|
||||
data = GPy.util.datasets.osu_run1()
|
||||
# optimize
|
||||
back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.)
|
||||
back_kernel=GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.)
|
||||
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
|
||||
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
|
||||
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
||||
|
|
@ -474,7 +474,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
|
|||
|
||||
data = GPy.util.datasets.osu_run1()
|
||||
Q = 6
|
||||
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
|
||||
kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2))
|
||||
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
|
||||
# optimize
|
||||
m.ensure_default_constraints()
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ def coregionalization_toy2(optimize=True, plot=True):
|
|||
Y = np.vstack((Y1, Y2))
|
||||
|
||||
#build the kernel
|
||||
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
|
||||
k1 = GPy.kern.RBF(1) + GPy.kern.bias(1)
|
||||
k2 = GPy.kern.coregionalize(2,1)
|
||||
k = k1**k2
|
||||
m = GPy.models.GPRegression(X, Y, kernel=k)
|
||||
|
|
@ -68,7 +68,7 @@ def coregionalization_toy2(optimize=True, plot=True):
|
|||
# Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
|
||||
# Y = np.vstack((Y1, Y2))
|
||||
#
|
||||
# k1 = GPy.kern.rbf(1)
|
||||
# k1 = GPy.kern.RBF(1)
|
||||
# m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
|
||||
# m.constrain_fixed('.*rbf_var', 1.)
|
||||
# m.optimize(max_iters=100)
|
||||
|
|
@ -127,7 +127,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
|
|||
Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None],
|
||||
np.random.randint(0, 4, num_inducing)[:, None]))
|
||||
|
||||
k1 = GPy.kern.rbf(1)
|
||||
k1 = GPy.kern.RBF(1)
|
||||
k2 = GPy.kern.coregionalize(output_dim=5, rank=5)
|
||||
k = k1**k2
|
||||
|
||||
|
|
@ -156,7 +156,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
|
|||
|
||||
data['Y'] = data['Y'] - np.mean(data['Y'])
|
||||
|
||||
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
|
||||
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.RBF)
|
||||
if plot:
|
||||
pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet)
|
||||
ax = pb.gca()
|
||||
|
|
@ -172,8 +172,8 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
|
|||
optim_point_y = np.empty(2)
|
||||
np.random.seed(seed=seed)
|
||||
for i in range(0, model_restarts):
|
||||
# kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
|
||||
kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
|
||||
# kern = GPy.kern.RBF(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
|
||||
kern = GPy.kern.RBF(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
|
||||
|
||||
m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern)
|
||||
m['noise_variance'] = np.random.uniform(1e-3, 1)
|
||||
|
|
@ -196,7 +196,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
|
|||
ax.set_ylim(ylim)
|
||||
return m # (models, lls)
|
||||
|
||||
def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf):
|
||||
def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF):
|
||||
"""
|
||||
Evaluate the GP objective function for a given data set for a range of
|
||||
signal to noise ratios and a range of lengthscales.
|
||||
|
|
@ -278,10 +278,10 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True):
|
|||
optimizer='scg'
|
||||
x_len = 30
|
||||
X = np.linspace(0, 10, x_len)[:, None]
|
||||
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X))
|
||||
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.RBF(1).K(X))
|
||||
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
|
||||
|
||||
kern = GPy.kern.rbf(1)
|
||||
kern = GPy.kern.RBF(1)
|
||||
poisson_lik = GPy.likelihoods.Poisson()
|
||||
laplace_inf = GPy.inference.latent_function_inference.LaplaceInference()
|
||||
|
||||
|
|
@ -319,10 +319,10 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize
|
|||
if kernel_type == 'linear':
|
||||
kernel = GPy.kern.linear(X.shape[1], ARD=1)
|
||||
elif kernel_type == 'rbf_inv':
|
||||
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1)
|
||||
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
|
||||
else:
|
||||
kernel = GPy.kern.rbf(X.shape[1], ARD=1)
|
||||
kernel += GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||
kernel = GPy.kern.RBF(X.shape[1], ARD=1)
|
||||
kernel += GPy.kern.White(X.shape[1]) + GPy.kern.bias(X.shape[1])
|
||||
m = GPy.models.GPRegression(X, Y, kernel)
|
||||
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
|
||||
# m.set_prior('.*lengthscale',len_prior)
|
||||
|
|
@ -358,9 +358,9 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
|
|||
if kernel_type == 'linear':
|
||||
kernel = GPy.kern.linear(X.shape[1], ARD=1)
|
||||
elif kernel_type == 'rbf_inv':
|
||||
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1)
|
||||
kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
|
||||
else:
|
||||
kernel = GPy.kern.rbf(X.shape[1], ARD=1)
|
||||
kernel = GPy.kern.RBF(X.shape[1], ARD=1)
|
||||
#kernel += GPy.kern.bias(X.shape[1])
|
||||
X_variance = np.ones(X.shape) * 0.5
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance)
|
||||
|
|
@ -421,7 +421,7 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti
|
|||
X = np.random.uniform(-3., 3., (num_samples, 1))
|
||||
Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05
|
||||
# construct kernel
|
||||
rbf = GPy.kern.rbf(1)
|
||||
rbf = GPy.kern.RBF(1)
|
||||
# create simple GP Model
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
|
||||
m.checkgrad(verbose=1)
|
||||
|
|
@ -444,7 +444,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt
|
|||
Y[inan] = np.nan
|
||||
|
||||
# construct kernel
|
||||
rbf = GPy.kern.rbf(2)
|
||||
rbf = GPy.kern.RBF(2)
|
||||
|
||||
# create simple GP Model
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
|
||||
|
|
@ -476,9 +476,9 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
|
|||
# likelihood = GPy.likelihoods.Gaussian(Y)
|
||||
Z = np.random.uniform(-3., 3., (7, 1))
|
||||
|
||||
k = GPy.kern.rbf(1)
|
||||
k = GPy.kern.RBF(1)
|
||||
# create simple GP Model - no input uncertainty on this one
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z)
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z)
|
||||
|
||||
if optimize:
|
||||
m.optimize('scg', messages=1, max_iters=max_iters)
|
||||
|
|
@ -489,7 +489,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
|
|||
print m
|
||||
|
||||
# the same Model with uncertainty
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z, X_variance=S)
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S)
|
||||
if optimize:
|
||||
m.optimize('scg', messages=1, max_iters=max_iters)
|
||||
if plot:
|
||||
|
|
|
|||
|
|
@ -1,9 +1,33 @@
|
|||
# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from constructors import *
|
||||
try:
|
||||
from constructors import rbf_sympy, sympykern # these depend on sympy
|
||||
except:
|
||||
pass
|
||||
from kern import *
|
||||
from _src.rbf import RBF
|
||||
from _src.white import White
|
||||
from _src.kern import Kern
|
||||
from _src.linear import Linear
|
||||
#import bias
|
||||
#import Brownian
|
||||
#import coregionalize
|
||||
#import exponential
|
||||
#import eq_ode1
|
||||
#import finite_dimensional
|
||||
#import fixed
|
||||
#import gibbs
|
||||
#import hetero
|
||||
#import hierarchical
|
||||
#import independent_outputs
|
||||
#import linear
|
||||
#import Matern32
|
||||
#import Matern52
|
||||
#import mlp
|
||||
#import ODE_1
|
||||
#import periodic_exponential
|
||||
#import periodic_Matern32
|
||||
#import periodic_Matern52
|
||||
#import poly
|
||||
#import prod_orthogonal
|
||||
#import prod
|
||||
#import rational_quadratic
|
||||
#import rbfcos
|
||||
#import rbf
|
||||
#import rbf_inv
|
||||
#import spline
|
||||
#import symmetric
|
||||
#import white
|
||||
|
|
|
|||
215
GPy/kern/_src/add.py
Normal file
215
GPy/kern/_src/add.py
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
import itertools
|
||||
from linear import Linear
|
||||
from ...core.parameterization import Parameterized
|
||||
from ...core.parameterization.param import Param
|
||||
from kern import Kern
|
||||
|
||||
class Add(Kern):
|
||||
def __init__(self, subkerns, tensor):
|
||||
assert all([isinstance(k, Kern) for k in subkerns])
|
||||
if tensor:
|
||||
input_dim = sum([k.input_dim for k in subkerns])
|
||||
self.input_slices = []
|
||||
n = 0
|
||||
for k in subkerns:
|
||||
self.input_slices.append(slice(n, n+k.input_dim))
|
||||
n += k.input_dim
|
||||
else:
|
||||
assert all([k.input_dim == subkerns[0].input_dim for k in subkerns])
|
||||
input_dim = subkerns[0].input_dim
|
||||
self.input_slices = [slice(None) for k in subkerns]
|
||||
super(Add, self).__init__(input_dim, 'add')
|
||||
self.add_parameters(*subkerns)
|
||||
|
||||
|
||||
def K(self, X, X2=None):
|
||||
"""
|
||||
Compute the kernel function.
|
||||
|
||||
:param X: the first set of inputs to the kernel
|
||||
:param X2: (optional) the second set of arguments to the kernel. If X2
|
||||
is None, this is passed throgh to the 'part' object, which
|
||||
handLes this as X2 == X.
|
||||
"""
|
||||
assert X.shape[1] == self.input_dim
|
||||
if X2 is None:
|
||||
return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)])
|
||||
else:
|
||||
return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
[p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
"""Compute the gradient of the objective function with respect to X.
|
||||
|
||||
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
|
||||
:type dL_dK: np.ndarray (num_samples x num_inducing)
|
||||
:param X: Observed data inputs
|
||||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||
|
||||
target = np.zeros_like(X)
|
||||
if X2 is None:
|
||||
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
else:
|
||||
[np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
return target
|
||||
|
||||
def Kdiag(self, X):
|
||||
assert X.shape[1] == self.input_dim
|
||||
return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)])
|
||||
|
||||
|
||||
def psi0(self, Z, mu, S):
|
||||
return np.sum([p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices))],0)
|
||||
|
||||
def psi1(self, Z, mu, S):
|
||||
return np.sum([p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
|
||||
|
||||
def psi2(self, Z, mu, S):
|
||||
psi2 = np.sum([p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)], 0)
|
||||
|
||||
# compute the "cross" terms
|
||||
from white import White
|
||||
from rbf import RBF
|
||||
#from rbf_inv import RBFInv
|
||||
#from bias import Bias
|
||||
from linear import Linear
|
||||
#ffrom fixed import Fixed
|
||||
|
||||
for (p1, i1), (p2, i2) in itertools.combinations(itertools.izip(self._parameters_, self.input_slices), 2):
|
||||
# white doesn;t combine with anything
|
||||
if isinstance(p1, White) or isinstance(p2, White):
|
||||
pass
|
||||
# rbf X bias
|
||||
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
|
||||
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear))):
|
||||
tmp = p2.psi1(Z[:,i2], mu[:,i2], S[:,i2])
|
||||
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
|
||||
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
|
||||
tmp = p1.psi1(Z[:,i1], mu[:,i1], S[:,i1])
|
||||
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||
else:
|
||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||
return psi2
|
||||
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
from white import White
|
||||
from rbf import RBF
|
||||
#from rbf_inv import RBFInv
|
||||
#from bias import Bias
|
||||
from linear import Linear
|
||||
#ffrom fixed import Fixed
|
||||
|
||||
for p1, is1 in zip(self._parameters_, self.input_slices):
|
||||
|
||||
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2, is2 in zip(self._parameters_, self.input_slices):
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
|
||||
|
||||
|
||||
p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
|
||||
|
||||
|
||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
from white import white
|
||||
from rbf import rbf
|
||||
#from rbf_inv import rbfinv
|
||||
#from bias import bias
|
||||
from linear import linear
|
||||
#ffrom fixed import fixed
|
||||
|
||||
target = np.zeros(Z.shape)
|
||||
for p1, is1 in zip(self._parameters_, self.input_slices):
|
||||
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2, is2 in zip(self._parameters_, self.input_slices):
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, white):
|
||||
continue
|
||||
elif isinstance(p2, bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
|
||||
|
||||
|
||||
target += p1.gradients_z_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
|
||||
return target
|
||||
|
||||
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
from white import white
|
||||
from rbf import rbf
|
||||
#from rbf_inv import rbfinv
|
||||
#from bias import bias
|
||||
from linear import linear
|
||||
#ffrom fixed import fixed
|
||||
|
||||
target_mu = np.zeros(mu.shape)
|
||||
target_S = np.zeros(S.shape)
|
||||
for p1, is1 in zip(self._parameters_, self.input_slices):
|
||||
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2, is2 in zip(self._parameters_, self.input_slices):
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, white):
|
||||
continue
|
||||
elif isinstance(p2, bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
|
||||
|
||||
|
||||
a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
|
||||
target_mu += a
|
||||
target_S += b
|
||||
return target_mu, target_S
|
||||
|
||||
def plot(self, *args, **kwargs):
|
||||
"""
|
||||
See GPy.plotting.matplot_dep.plot
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import kernel_plots
|
||||
kernel_plots.plot(self,*args)
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Get the current state of the class,
|
||||
here just all the indices, rest can get recomputed
|
||||
"""
|
||||
return Parameterized._getstate(self) + [#self._parameters_,
|
||||
self.input_dim,
|
||||
self.input_slices,
|
||||
self._param_slices_
|
||||
]
|
||||
|
||||
def _setstate(self, state):
|
||||
self._param_slices_ = state.pop()
|
||||
self.input_slices = state.pop()
|
||||
self.input_dim = state.pop()
|
||||
Parameterized._setstate(self, state)
|
||||
|
||||
|
||||
|
|
@ -1,12 +1,12 @@
|
|||
# Copyright (c) 2012, James Hensman and Ricardo Andrade
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
from kern import Kern
|
||||
import numpy as np
|
||||
from scipy import weave
|
||||
from ...core.parameterization import Param
|
||||
|
||||
class Coregionalize(Kernpart):
|
||||
class Coregionalize(Kern):
|
||||
"""
|
||||
Covariance function for intrinsic/linear coregionalization models
|
||||
|
||||
|
|
@ -133,6 +133,8 @@ class Coregionalize(Kernpart):
|
|||
#dkappa = dL_dKdiag_small
|
||||
#target += np.hstack([dW.flatten(),dkappa])
|
||||
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
#NOTE In this case, pass is equivalent to returning zero.
|
||||
pass
|
||||
def gradients_X(self,dL_dK,X,X2):
|
||||
if X2 is None:
|
||||
return np.zeros((X.shape[0], X.shape[0]))
|
||||
else:
|
||||
return np.zeros((X.shape[0], X2.shape[0]))
|
||||
330
GPy/kern/_src/kern.py
Normal file
330
GPy/kern/_src/kern.py
Normal file
|
|
@ -0,0 +1,330 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
import itertools
|
||||
from ...core.parameterization import Parameterized
|
||||
from ...core.parameterization.param import Param
|
||||
|
||||
|
||||
class Kern(Parameterized):
|
||||
def __init__(self, input_dim, name):
|
||||
"""
|
||||
The base class for a kernel: a positive definite function
|
||||
which forms of a covariance function (kernel).
|
||||
|
||||
:param input_dim: the number of input dimensions to the function
|
||||
:type input_dim: int
|
||||
|
||||
Do not instantiate.
|
||||
"""
|
||||
super(Kern, self).__init__(name)
|
||||
self.input_dim = input_dim
|
||||
|
||||
def K(self, X, X2, target):
|
||||
raise NotImplementedError
|
||||
def Kdiag(self, Xa ,target):
|
||||
raise NotImplementedError
|
||||
def _param_grad_helper(self, dL_dK,X, X2, target):
|
||||
raise NotImplementedError
|
||||
def psi0(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi0_dtheta(self,dL_dpsi0, Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def psi1(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi1_dtheta(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def psi2(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
raise NotImplementedError
|
||||
def dKdiag_dX(self, dL_dK, X, target):
|
||||
raise NotImplementedError
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||
raise NotImplementedError
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
"""Set the gradients of all parameters when doing sparse (M) inference."""
|
||||
raise NotImplementedError
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
|
||||
raise NotImplementedError
|
||||
|
||||
def plot_ARD(self, *args):
|
||||
"""If an ARD kernel is present, plot a bar representation using matplotlib
|
||||
|
||||
See GPy.plotting.matplot_dep.plot_ARD
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import kernel_plots
|
||||
return kernel_plots.plot_ARD(self,*args)
|
||||
|
||||
|
||||
def __add__(self, other):
|
||||
""" Overloading of the '+' operator. for more control, see self.add """
|
||||
return self.add(other)
|
||||
|
||||
def add(self, other, tensor=False):
|
||||
"""
|
||||
Add another kernel to this one.
|
||||
|
||||
If Tensor is False, both kernels are defined on the same _space_. then
|
||||
the created kernel will have the same number of inputs as self and
|
||||
other (which must be the same).
|
||||
|
||||
If Tensor is True, then the dimensions are stacked 'horizontally', so
|
||||
that the resulting kernel has self.input_dim + other.input_dim
|
||||
|
||||
:param other: the other kernel to be added
|
||||
:type other: GPy.kern
|
||||
|
||||
"""
|
||||
assert isinstance(other, Kern), "only kernels can be added to kernels..."
|
||||
from add import Add
|
||||
return Add([self, other], tensor)
|
||||
|
||||
def __call__(self, X, X2=None):
|
||||
return self.K(X, X2)
|
||||
|
||||
def __mul__(self, other):
|
||||
""" Here we overload the '*' operator. See self.prod for more information"""
|
||||
return self.prod(other)
|
||||
|
||||
def __pow__(self, other, tensor=False):
|
||||
"""
|
||||
Shortcut for tensor `prod`.
|
||||
"""
|
||||
return self.prod(other, tensor=True)
|
||||
|
||||
def prod(self, other, tensor=False):
|
||||
"""
|
||||
Multiply two kernels (either on the same space, or on the tensor product of the input space).
|
||||
|
||||
:param other: the other kernel to be added
|
||||
:type other: GPy.kern
|
||||
:param tensor: whether or not to use the tensor space (default is false).
|
||||
:type tensor: bool
|
||||
|
||||
"""
|
||||
assert isinstance(other, Kern), "only kernels can be added to kernels..."
|
||||
from prod import Prod
|
||||
return Prod(self, other, tensor)
|
||||
|
||||
|
||||
from GPy.core.model import Model
|
||||
|
||||
class Kern_check_model(Model):
|
||||
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Model.__init__(self, 'kernel_test_model')
|
||||
num_samples = 20
|
||||
num_samples2 = 10
|
||||
if kernel==None:
|
||||
kernel = GPy.kern.rbf(1)
|
||||
if X==None:
|
||||
X = np.random.randn(num_samples, kernel.input_dim)
|
||||
if dL_dK==None:
|
||||
if X2==None:
|
||||
dL_dK = np.ones((X.shape[0], X.shape[0]))
|
||||
else:
|
||||
dL_dK = np.ones((X.shape[0], X2.shape[0]))
|
||||
|
||||
self.kernel=kernel
|
||||
self.add_parameter(kernel)
|
||||
self.X = X
|
||||
self.X2 = X2
|
||||
self.dL_dK = dL_dK
|
||||
|
||||
def is_positive_definite(self):
|
||||
v = np.linalg.eig(self.kernel.K(self.X))[0]
|
||||
if any(v<-10*sys.float_info.epsilon):
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def log_likelihood(self):
|
||||
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
|
||||
|
||||
class Kern_check_dK_dtheta(Kern_check_model):
|
||||
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class Kern_check_dKdiag_dtheta(Kern_check_model):
|
||||
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||
if dL_dK==None:
|
||||
self.dL_dK = np.ones((self.X.shape[0]))
|
||||
def parameters_changed(self):
|
||||
self.kernel.update_gradients_full(self.dL_dK, self.X)
|
||||
|
||||
def log_likelihood(self):
|
||||
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
|
||||
|
||||
class Kern_check_dK_dX(Kern_check_model):
|
||||
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||
self.remove_parameter(kernel)
|
||||
self.X = Param('X', self.X)
|
||||
self.add_parameter(self.X)
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
|
||||
|
||||
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
||||
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||
if dL_dK==None:
|
||||
self.dL_dK = np.ones((self.X.shape[0]))
|
||||
|
||||
def log_likelihood(self):
|
||||
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
|
||||
|
||||
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
||||
"""
|
||||
This function runs on kernels to check the correctness of their
|
||||
implementation. It checks that the covariance function is positive definite
|
||||
for a randomly generated data set.
|
||||
|
||||
:param kern: the kernel to be tested.
|
||||
:type kern: GPy.kern.Kernpart
|
||||
:param X: X input values to test the covariance function.
|
||||
:type X: ndarray
|
||||
:param X2: X2 input values to test the covariance function.
|
||||
:type X2: ndarray
|
||||
|
||||
"""
|
||||
pass_checks = True
|
||||
if X==None:
|
||||
X = np.random.randn(10, kern.input_dim)
|
||||
if output_ind is not None:
|
||||
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
|
||||
if X2==None:
|
||||
X2 = np.random.randn(20, kern.input_dim)
|
||||
if output_ind is not None:
|
||||
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
|
||||
|
||||
if verbose:
|
||||
print("Checking covariance function is positive definite.")
|
||||
result = Kern_check_model(kern, X=X).is_positive_definite()
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Positive definite check failed for " + kern.name + " covariance function.")
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X) wrt theta.")
|
||||
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt theta.")
|
||||
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt theta.")
|
||||
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X) wrt X.")
|
||||
try:
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("gradients_X not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt X.")
|
||||
try:
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("gradients_X not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt X.")
|
||||
try:
|
||||
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("gradients_X not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
return pass_checks
|
||||
|
|
@ -4,13 +4,14 @@
|
|||
|
||||
import numpy as np
|
||||
from scipy import weave
|
||||
from kernpart import Kernpart
|
||||
from kern import Kern
|
||||
from ...util.linalg import tdot
|
||||
from ...util.misc import fast_array_equal, param_to_array
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
from ...util.caching import Cacher, cache_this
|
||||
|
||||
class Linear(Kernpart):
|
||||
class Linear(Kern):
|
||||
"""
|
||||
Linear kernel
|
||||
|
||||
|
|
@ -43,35 +44,48 @@ class Linear(Kernpart):
|
|||
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
|
||||
else:
|
||||
variances = np.ones(self.input_dim)
|
||||
|
||||
|
||||
self.variances = Param('variances', variances, Logexp())
|
||||
self.variances.gradient = np.zeros(self.variances.shape)
|
||||
self.add_parameter(self.variances)
|
||||
self.variances.add_observer(self, self.update_variance)
|
||||
self.variances.add_observer(self, self._on_changed)
|
||||
|
||||
# initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||
self._X, self._X2 = np.empty(shape=(2, 1))
|
||||
|
||||
def update_variance(self, v):
|
||||
self.variances2 = np.square(self.variances)
|
||||
def _on_changed(self, obj):
|
||||
self._notify_observers()
|
||||
|
||||
def on_input_change(self, X):
|
||||
self._K_computations(X, None)
|
||||
@cache_this(limit=3, reset_on_self=True)
|
||||
def K(self, X, X2=None):
|
||||
if self.ARD:
|
||||
if X2 is None:
|
||||
return tdot(X*np.sqrt(self.variances))
|
||||
else:
|
||||
rv = np.sqrt(self.variances)
|
||||
return np.dot(X*rv, (X2*rv).T)
|
||||
else:
|
||||
return self._dot_product(X, X2) * self.variances
|
||||
|
||||
@cache_this(limit=3, reset_on_self=False)
|
||||
def _dot_product(self, X, X2=None):
|
||||
if X2 is None:
|
||||
return tdot(X)
|
||||
else:
|
||||
return np.dot(X, X2.T)
|
||||
|
||||
def Kdiag(self, X):
|
||||
return np.sum(self.variances * np.square(X), -1)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
self.variances.gradient[:] = 0
|
||||
self.variances.gradient = np.zeros(self.variances.size)
|
||||
self._param_grad_helper(dL_dK, X, None, self.variances.gradient)
|
||||
|
||||
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
tmp = dL_dKdiag[:, None] * X ** 2
|
||||
if self.ARD:
|
||||
self.variances.gradient = tmp.sum(0)
|
||||
else:
|
||||
self.variances.gradient = tmp.sum()
|
||||
self.variances.gradient = np.atleast_1d(tmp.sum())
|
||||
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
|
||||
self._param_grad_helper(dL_dKnm, X, Z, self.variances.gradient)
|
||||
|
||||
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
self._psi_computations(Z, mu, S)
|
||||
# psi0:
|
||||
|
|
@ -85,24 +99,7 @@ class Linear(Kernpart):
|
|||
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
|
||||
else: self.variances.gradient += tmp.sum()
|
||||
#from Kmm
|
||||
self._K_computations(Z, None)
|
||||
self._param_grad_helper(dL_dKmm, Z, None, self.variances.gradient)
|
||||
|
||||
def K(self, X, X2, target):
|
||||
if self.ARD:
|
||||
XX = X * np.sqrt(self.variances)
|
||||
if X2 is None:
|
||||
target += tdot(XX)
|
||||
else:
|
||||
XX2 = X2 * np.sqrt(self.variances)
|
||||
target += np.dot(XX, XX2.T)
|
||||
else:
|
||||
if X is not self._X or X2 is not None:
|
||||
self._K_computations(X, X2)
|
||||
target += self.variances * self._dot_product
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
np.add(target, np.sum(self.variances * np.square(X), -1), target)
|
||||
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
if self.ARD:
|
||||
|
|
@ -112,18 +109,16 @@ class Linear(Kernpart):
|
|||
product = X[:, None, :] * X2[None, :, :]
|
||||
target += (dL_dK[:, :, None] * product).sum(0).sum(0)
|
||||
else:
|
||||
if X is not self._X or X2 is not None:
|
||||
self._K_computations(X, X2)
|
||||
target += np.sum(self._dot_product * dL_dK)
|
||||
target += np.sum(self._dot_product(X, X2) * dL_dK)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
if X2 is None:
|
||||
target += 2*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
return 2.*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
else:
|
||||
target += (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
target += 2.*self.variances*dL_dKdiag[:,None]*X
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
return 2.*self.variances*dL_dKdiag[:,None]*X
|
||||
|
||||
#---------------------------------------#
|
||||
# PSI statistics #
|
||||
|
|
@ -224,7 +219,7 @@ class Linear(Kernpart):
|
|||
weave_options = {'headers' : ['<omp.h>'],
|
||||
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
||||
'extra_link_args' : ['-lgomp']}
|
||||
|
||||
|
||||
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
|
||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
|
||||
|
|
@ -273,15 +268,15 @@ class Linear(Kernpart):
|
|||
# Precomputations #
|
||||
#---------------------------------------#
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
|
||||
self._X = X.copy()
|
||||
if X2 is None:
|
||||
self._dot_product = tdot(param_to_array(X))
|
||||
self._X2 = None
|
||||
else:
|
||||
self._X2 = X2.copy()
|
||||
self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
|
||||
#def _K_computations(self, X, X2):
|
||||
#if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2)):
|
||||
#self._X = X.copy()
|
||||
#if X2 is None:
|
||||
##self._dot_product = tdot(param_to_array(X))
|
||||
#self._X2 = None
|
||||
#else:
|
||||
#self._X2 = X2.copy()
|
||||
#self._dot_product = np.dot(param_to_array(X), param_to_array(X2.T))
|
||||
|
||||
def _psi_computations(self, Z, mu, S):
|
||||
# here are the "statistics" for psi1 and psi2
|
||||
65
GPy/kern/_src/prod.py
Normal file
65
GPy/kern/_src/prod.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kern import Kern
|
||||
import numpy as np
|
||||
|
||||
class Prod(Kern):
|
||||
"""
|
||||
Computes the product of 2 kernels
|
||||
|
||||
:param k1, k2: the kernels to multiply
|
||||
:type k1, k2: Kern
|
||||
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
|
||||
:type tensor: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
def __init__(self, k1, k2, tensor=False):
|
||||
if tensor:
|
||||
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
|
||||
self.slice1 = slice(0,k1.input_dim)
|
||||
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
|
||||
else:
|
||||
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
|
||||
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
|
||||
self.slice1 = slice(0, self.input_dim)
|
||||
self.slice2 = slice(0, self.input_dim)
|
||||
self.k1 = k1
|
||||
self.k2 = k2
|
||||
self.add_parameters(self.k1, self.k2)
|
||||
|
||||
def K(self, X, X2=None):
|
||||
if X2 is None:
|
||||
return self.k1.K(X[:,self.slice1], None) * self.k2.K(X[:,self.slice2], None)
|
||||
else:
|
||||
return self.k1.K(X[:,self.slice1], X2[:,self.slice1]) * self.k2.K(X[:,self.slice2], X2[:,self.slice2])
|
||||
|
||||
def Kdiag(self, X):
|
||||
return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2])
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
self.k1.update_gradients_full(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1])
|
||||
self.k2.update_gradients_full(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2])
|
||||
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
self.k1.update_gradients_sparse(dL_dKmm * self.k2.K(Z[:,self.slice2]), dL_dKnm * self.k2(X[:,self.slice2], Z[:,self.slice2]), dL_dKdiag * self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1], Z[:,self.slice1] )
|
||||
self.k2.update_gradients_sparse(dL_dKmm * self.k1.K(Z[:,self.slice1]), dL_dKnm * self.k1(X[:,self.slice1], Z[:,self.slice1]), dL_dKdiag * self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2], Z[:,self.slice2] )
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
target = np.zeros(X.shape)
|
||||
if X2 is None:
|
||||
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2]), X[:,self.slice1], None)
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1]), X[:,self.slice2], None)
|
||||
else:
|
||||
target[:,self.slice1] += self.k1.gradients_X(dL_dK*self.k2(X[:,self.slice2], X2[:,self.slice2]), X[:,self.slice1], X2[:,self.slice1])
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dK*self.k1(X[:,self.slice1], X2[:,self.slice1]), X[:,self.slice2], X2[:,self.slice2])
|
||||
return target
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
target = np.zeros(X.shape)
|
||||
target[:,self.slice1] = self.k1.gradients_X(dL_dKdiag*self.k2.Kdiag(X[:,self.slice2]), X[:,self.slice1])
|
||||
target[:,self.slice2] += self.k2.gradients_X(dL_dKdiag*self.k1.Kdiag(X[:,self.slice1]), X[:,self.slice2])
|
||||
return target
|
||||
|
||||
|
||||
|
|
@ -4,13 +4,13 @@
|
|||
|
||||
import numpy as np
|
||||
from scipy import weave
|
||||
from kernpart import Kernpart
|
||||
from kern import Kern
|
||||
from ...util.linalg import tdot
|
||||
from ...util.misc import fast_array_equal, param_to_array
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
|
||||
class RBF(Kernpart):
|
||||
class RBF(Kern):
|
||||
"""
|
||||
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
|
||||
|
||||
|
|
@ -52,30 +52,16 @@ class RBF(Kernpart):
|
|||
lengthscale = np.ones(self.input_dim)
|
||||
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
|
||||
|
||||
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
|
||||
self.lengthscale.add_observer(self, self.update_lengthscale)
|
||||
self.update_lengthscale(self.lengthscale)
|
||||
|
||||
|
||||
self.add_parameters(self.variance, self.lengthscale)
|
||||
self.parameters_changed() # initializes cache
|
||||
|
||||
#self.update_inv_lengthscale(self.lengthscale)
|
||||
#self.parameters_changed()
|
||||
# initialize cache
|
||||
#self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||
#self._X, self._X2, self._params_save = np.empty(shape=(3, 1))
|
||||
|
||||
# a set of optional args to pass to weave
|
||||
# self.weave_options = {'headers' : ['<omp.h>'],
|
||||
# 'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
|
||||
# 'extra_link_args' : ['-lgomp']}
|
||||
self.weave_options = {}
|
||||
|
||||
def on_input_change(self, X):
|
||||
#self._K_computations(X, None)
|
||||
pass
|
||||
|
||||
def update_lengthscale(self, l):
|
||||
self.lengthscale2 = np.square(self.lengthscale)
|
||||
|
||||
|
|
@ -84,23 +70,27 @@ class RBF(Kernpart):
|
|||
self._X, self._X2 = np.empty(shape=(2, 1))
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
||||
|
||||
def K(self, X, X2, target):
|
||||
def K(self, X, X2=None):
|
||||
self._K_computations(X, X2)
|
||||
target += self.variance * self._K_dvar
|
||||
return self.variance * self._K_dvar
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
np.add(target, self.variance, target)
|
||||
def Kdiag(self, X):
|
||||
ret = np.ones(X.shape[0])
|
||||
ret[:] = self.variance
|
||||
return ret
|
||||
|
||||
def psi0(self, Z, mu, S, target):
|
||||
target += self.variance
|
||||
def psi0(self, Z, mu, S):
|
||||
ret = np.empty(mu.shape[0], dtype=np.float64)
|
||||
ret[:] = self.variance
|
||||
return ret
|
||||
|
||||
def psi1(self, Z, mu, S, target):
|
||||
def psi1(self, Z, mu, S):
|
||||
self._psi_computations(Z, mu, S)
|
||||
target += self._psi1
|
||||
return self._psi1
|
||||
|
||||
def psi2(self, Z, mu, S, target):
|
||||
def psi2(self, Z, mu, S):
|
||||
self._psi_computations(Z, mu, S)
|
||||
target += self._psi2
|
||||
return self._psi2
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
self._K_computations(X, None)
|
||||
|
|
@ -165,7 +155,38 @@ class RBF(Kernpart):
|
|||
else:
|
||||
self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
self._psi_computations(Z, mu, S)
|
||||
|
||||
#psi1
|
||||
denominator = (self.lengthscale2 * (self._psi1_denom))
|
||||
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
||||
grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
||||
|
||||
#psi2
|
||||
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
|
||||
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
|
||||
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
||||
grad += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
||||
|
||||
return grad
|
||||
|
||||
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
self._psi_computations(Z, mu, S)
|
||||
#psi1
|
||||
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
|
||||
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
||||
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
||||
|
||||
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
|
||||
grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
||||
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
||||
|
||||
return grad_mu, grad_S
|
||||
|
||||
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
||||
self._K_computations(X, X2)
|
||||
if X2 is None:
|
||||
|
|
@ -173,45 +194,16 @@ class RBF(Kernpart):
|
|||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
gradients_X = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||
return np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
def dKdiag_dX(self, dL_dKdiag, X):
|
||||
return np.zeros(X.shape[0])
|
||||
|
||||
#---------------------------------------#
|
||||
# PSI statistics #
|
||||
#---------------------------------------#
|
||||
|
||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
||||
pass
|
||||
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
denominator = (self.lengthscale2 * (self._psi1_denom))
|
||||
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
||||
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
||||
|
||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
|
||||
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
||||
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
||||
|
||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
|
||||
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
|
||||
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
||||
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
||||
|
||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
||||
"""Think N,num_inducing,num_inducing,input_dim """
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
|
||||
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
||||
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
||||
|
||||
#---------------------------------------#
|
||||
#---------------------------------------#
|
||||
# Precomputations #
|
||||
#---------------------------------------#
|
||||
|
||||
|
|
@ -373,6 +365,7 @@ class RBF(Kernpart):
|
|||
#include <omp.h>
|
||||
#include <math.h>
|
||||
"""
|
||||
mu = param_to_array(mu)
|
||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
|
||||
type_converters=weave.converters.blitz, **self.weave_options)
|
||||
|
|
@ -1,12 +1,12 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
from kern import Kern
|
||||
import numpy as np
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
|
||||
class White(Kernpart):
|
||||
class White(Kern):
|
||||
"""
|
||||
White noise kernel.
|
||||
|
||||
|
|
@ -20,14 +20,17 @@ class White(Kernpart):
|
|||
self.input_dim = input_dim
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
self.add_parameters(self.variance)
|
||||
self._psi1 = 0 # TODO: more elegance here
|
||||
|
||||
def K(self,X,X2,target):
|
||||
def K(self, X, X2=None):
|
||||
if X2 is None:
|
||||
target += np.eye(X.shape[0])*self.variance
|
||||
return np.eye(X.shape[0])*self.variance
|
||||
else:
|
||||
return np.zeros((X.shape[0], X2.shape[0]))
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
target += self.variance
|
||||
def Kdiag(self,X):
|
||||
ret = np.ones(X.shape[0])
|
||||
ret[:] = self.variance
|
||||
return ret
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
self.variance.gradient = np.trace(dL_dK)
|
||||
|
|
@ -38,14 +41,8 @@ class White(Kernpart):
|
|||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
raise NotImplementedError
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
target += np.sum(dL_dKdiag)
|
||||
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
pass
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
pass
|
||||
def gradients_X(self,dL_dK,X,X2):
|
||||
return np.zeros_like(X)
|
||||
|
||||
def psi0(self,Z,mu,S,target):
|
||||
pass # target += self.variance
|
||||
680
GPy/kern/kern.py
680
GPy/kern/kern.py
|
|
@ -1,680 +0,0 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
import itertools
|
||||
from parts.prod import Prod as prod
|
||||
from parts.linear import Linear
|
||||
from parts.kernpart import Kernpart
|
||||
from ..core.parameterization import Parameterized
|
||||
from GPy.core.parameterization.param import Param
|
||||
|
||||
class kern(Parameterized):
|
||||
def __init__(self, input_dim, parts=[], input_slices=None):
|
||||
"""
|
||||
This is the main kernel class for GPy. It handles multiple
|
||||
(additive) kernel functions, and keeps track of various things
|
||||
like which parameters live where.
|
||||
|
||||
The technical code for kernels is divided into _parts_ (see
|
||||
e.g. rbf.py). This object contains a list of parts, which are
|
||||
computed additively. For multiplication, special _prod_ parts
|
||||
are used.
|
||||
|
||||
:param input_dim: The dimensionality of the kernel's input space
|
||||
:type input_dim: int
|
||||
:param parts: the 'parts' (PD functions) of the kernel
|
||||
:type parts: list of Kernpart objects
|
||||
:param input_slices: the slices on the inputs which apply to each kernel
|
||||
:type input_slices: list of slice objects, or list of bools
|
||||
|
||||
"""
|
||||
super(kern, self).__init__('kern')
|
||||
self.add_parameters(*parts)
|
||||
self.input_dim = input_dim
|
||||
|
||||
if input_slices is None:
|
||||
self.input_slices = [slice(None) for p in self._parameters_]
|
||||
else:
|
||||
assert len(input_slices) == len(self._parameters_)
|
||||
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
|
||||
|
||||
for p in self._parameters_:
|
||||
assert isinstance(p, Kernpart), "bad kernel part"
|
||||
|
||||
def parameters_changed(self):
|
||||
[p.parameters_changed() for p in self._parameters_]
|
||||
|
||||
def connect_input(self, Xparam):
|
||||
[p.connect_input(Xparam) for p in self._parameters_]
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Get the current state of the class,
|
||||
here just all the indices, rest can get recomputed
|
||||
"""
|
||||
return Parameterized._getstate(self) + [#self._parameters_,
|
||||
#self.num_params,
|
||||
self.input_dim,
|
||||
self.input_slices,
|
||||
self._param_slices_
|
||||
]
|
||||
|
||||
def _setstate(self, state):
|
||||
self._param_slices_ = state.pop()
|
||||
self.input_slices = state.pop()
|
||||
self.input_dim = state.pop()
|
||||
#self.num_params = state.pop()
|
||||
#self._parameters_ = state.pop()
|
||||
Parameterized._setstate(self, state)
|
||||
|
||||
|
||||
def plot_ARD(self, *args):
|
||||
"""If an ARD kernel is present, plot a bar representation using matplotlib
|
||||
|
||||
See GPy.plotting.matplot_dep.plot_ARD
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import kernel_plots
|
||||
return kernel_plots.plot_ARD(self,*args)
|
||||
|
||||
# def _transform_gradients(self, g):
|
||||
# """
|
||||
# Apply the transformations of the kernel so that the returned vector
|
||||
# represents the gradient in the transformed space (i.e. that given by
|
||||
# get_params_transformed())
|
||||
#
|
||||
# :param g: the gradient vector for the current model, usually created by _param_grad_helper
|
||||
# """
|
||||
# x = self._get_params()
|
||||
# [np.place(g, index, g[index] * constraint.gradfactor(x[index]))
|
||||
# for constraint, index in self.constraints.iteritems() if constraint is not __fixed__]
|
||||
# # for constraint, index in self.constraints.iteritems():
|
||||
# # if constraint != __fixed__:
|
||||
# # g[index] = g[index] * constraint.gradfactor(x[index])
|
||||
# #[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
|
||||
# [np.put(g, i, v) for i, v in [[i, t.sum()] for p in self._parameters_ for t,i in p._tied_to_me_.iteritems()]]
|
||||
# # if len(self.tied_indices) or len(self.fixed_indices):
|
||||
# # to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
|
||||
# # return np.delete(g, to_remove)
|
||||
# # else:
|
||||
# if self._fixes_ is not None: return g[self._fixes_]
|
||||
# return g
|
||||
# x = self._get_params()
|
||||
# [np.put(x, i, x * t.gradfactor(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
|
||||
# [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
|
||||
# if len(self.tied_indices) or len(self.fixed_indices):
|
||||
# to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
|
||||
# return np.delete(g, to_remove)
|
||||
# else:
|
||||
# return g
|
||||
|
||||
def __add__(self, other):
|
||||
""" Overloading of the '+' operator. for more control, see self.add """
|
||||
return self.add(other)
|
||||
|
||||
def add(self, other, tensor=False):
|
||||
"""
|
||||
Add another kernel to this one.
|
||||
|
||||
If Tensor is False, both kernels are defined on the same _space_. then
|
||||
the created kernel will have the same number of inputs as self and
|
||||
other (which must be the same).
|
||||
|
||||
If Tensor is True, then the dimensions are stacked 'horizontally', so
|
||||
that the resulting kernel has self.input_dim + other.input_dim
|
||||
|
||||
:param other: the other kernel to be added
|
||||
:type other: GPy.kern
|
||||
|
||||
"""
|
||||
if tensor:
|
||||
D = self.input_dim + other.input_dim
|
||||
self_input_slices = [slice(*sl.indices(self.input_dim)) for sl in self.input_slices]
|
||||
other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices]
|
||||
other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices]
|
||||
|
||||
newkern = kern(D, self._parameters_ + other._parameters_, self_input_slices + other_input_slices)
|
||||
|
||||
# transfer constraints:
|
||||
# newkern.constrained_indices = self.constrained_indices + [x + self.num_params for x in other.constrained_indices]
|
||||
# newkern.constraints = self.constraints + other.constraints
|
||||
# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
|
||||
# newkern.fixed_values = self.fixed_values + other.fixed_values
|
||||
# newkern.constraints = self.constraints + other.constraints
|
||||
# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
|
||||
else:
|
||||
assert self.input_dim == other.input_dim
|
||||
newkern = kern(self.input_dim, self._parameters_ + other._parameters_, self.input_slices + other.input_slices)
|
||||
# transfer constraints:
|
||||
# newkern.constrained_indices = self.constrained_indices + [i + self.num_params for i in other.constrained_indices]
|
||||
# newkern.constraints = self.constraints + other.constraints
|
||||
# newkern.fixed_indices = self.fixed_indices + [self.num_params + x for x in other.fixed_indices]
|
||||
# newkern.fixed_values = self.fixed_values + other.fixed_values
|
||||
# newkern.tied_indices = self.tied_indices + [self.num_params + x for x in other.tied_indices]
|
||||
|
||||
[newkern.constraints.add(transform, ind) for transform, ind in self.constraints.iteritems()]
|
||||
[newkern.constraints.add(transform, ind+self.size) for transform, ind in other.constraints.iteritems()]
|
||||
newkern._fixes_ = ((self._fixes_ or 0) + (other._fixes_ or 0)) or None
|
||||
|
||||
return newkern
|
||||
|
||||
def __call__(self, X, X2=None):
|
||||
return self.K(X, X2)
|
||||
|
||||
def __mul__(self, other):
|
||||
""" Here we overload the '*' operator. See self.prod for more information"""
|
||||
return self.prod(other)
|
||||
|
||||
def __pow__(self, other, tensor=False):
|
||||
"""
|
||||
Shortcut for tensor `prod`.
|
||||
"""
|
||||
return self.prod(other, tensor=True)
|
||||
|
||||
def prod(self, other, tensor=False):
|
||||
"""
|
||||
Multiply two kernels (either on the same space, or on the tensor product of the input space).
|
||||
|
||||
:param other: the other kernel to be added
|
||||
:type other: GPy.kern
|
||||
:param tensor: whether or not to use the tensor space (default is false).
|
||||
:type tensor: bool
|
||||
|
||||
"""
|
||||
K1 = self
|
||||
K2 = other
|
||||
#K1 = self.copy()
|
||||
#K2 = other.copy()
|
||||
|
||||
slices = []
|
||||
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
|
||||
s1, s2 = [False] * K1.input_dim, [False] * K2.input_dim
|
||||
s1[sl1], s2[sl2] = [True], [True]
|
||||
slices += [s1 + s2]
|
||||
|
||||
newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1._parameters_, K2._parameters_)]
|
||||
|
||||
if tensor:
|
||||
newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices)
|
||||
else:
|
||||
newkern = kern(K1.input_dim, newkernparts, slices)
|
||||
|
||||
#newkern._follow_constrains(K1, K2)
|
||||
return newkern
|
||||
|
||||
# def _follow_constrains(self, K1, K2):
|
||||
#
|
||||
# # Build the array that allows to go from the initial indices of the param to the new ones
|
||||
# K1_param = []
|
||||
# n = 0
|
||||
# for k1 in K1.parts:
|
||||
# K1_param += [range(n, n + k1.num_params)]
|
||||
# n += k1.num_params
|
||||
# n = 0
|
||||
# K2_param = []
|
||||
# for k2 in K2.parts:
|
||||
# K2_param += [range(K1.num_params + n, K1.num_params + n + k2.num_params)]
|
||||
# n += k2.num_params
|
||||
# index_param = []
|
||||
# for p1 in K1_param:
|
||||
# for p2 in K2_param:
|
||||
# index_param += p1 + p2
|
||||
# index_param = np.array(index_param)
|
||||
#
|
||||
# # Get the ties and constrains of the kernels before the multiplication
|
||||
# prev_ties = K1.tied_indices + [arr + K1.num_params for arr in K2.tied_indices]
|
||||
#
|
||||
# prev_constr_ind = [K1.constrained_indices] + [K1.num_params + i for i in K2.constrained_indices]
|
||||
# prev_constr = K1.constraints + K2.constraints
|
||||
#
|
||||
# # prev_constr_fix = K1.fixed_indices + [arr + K1.num_params for arr in K2.fixed_indices]
|
||||
# # prev_constr_fix_values = K1.fixed_values + K2.fixed_values
|
||||
#
|
||||
# # follow the previous ties
|
||||
# for arr in prev_ties:
|
||||
# for j in arr:
|
||||
# index_param[np.where(index_param == j)[0]] = arr[0]
|
||||
#
|
||||
# # ties and constrains
|
||||
# for i in range(K1.num_params + K2.num_params):
|
||||
# index = np.where(index_param == i)[0]
|
||||
# if index.size > 1:
|
||||
# self.tie_params(index)
|
||||
# for i, t in zip(prev_constr_ind, prev_constr):
|
||||
# self.constrain(np.where(index_param == i)[0], t)
|
||||
#
|
||||
# def _get_params(self):
|
||||
# return np.hstack(self._parameters_)
|
||||
# return np.hstack([p._get_params() for p in self._parameters_])
|
||||
|
||||
# def _set_params(self, x):
|
||||
# import ipdb;ipdb.set_trace()
|
||||
# [p._set_params(x[s]) for p, s in zip(self._parameters_, self._param_slices_)]
|
||||
|
||||
# def _get_param_names(self):
|
||||
# # this is a bit nasty: we want to distinguish between parts with the same name by appending a count
|
||||
# part_names = np.array([k.name for k in self._parameters_], dtype=np.str)
|
||||
# counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)]
|
||||
# cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)]
|
||||
# names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)]
|
||||
#
|
||||
# return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self._parameters_)], [])
|
||||
|
||||
def K(self, X, X2=None, which_parts='all'):
|
||||
"""
|
||||
Compute the kernel function.
|
||||
|
||||
:param X: the first set of inputs to the kernel
|
||||
:param X2: (optional) the second set of arguments to the kernel. If X2
|
||||
is None, this is passed throgh to the 'part' object, which
|
||||
handles this as X2 == X.
|
||||
:param which_parts: a list of booleans detailing whether to include
|
||||
each of the part functions. By default, 'all'
|
||||
indicates all parts
|
||||
"""
|
||||
if which_parts == 'all':
|
||||
which_parts = [True] * self.size
|
||||
assert X.shape[1] == self.input_dim
|
||||
if X2 is None:
|
||||
target = np.zeros((X.shape[0], X.shape[0]))
|
||||
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
|
||||
else:
|
||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||
[p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used]
|
||||
return target
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
[p.update_gradients_full(dL_dK, X) for p in self._parameters_]
|
||||
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
[p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_]
|
||||
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
[p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_]
|
||||
|
||||
def _param_grad_helper(self, dL_dK, X, X2=None):
|
||||
"""
|
||||
Compute the gradient of the covariance function with respect to the parameters.
|
||||
|
||||
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
|
||||
:type dL_dK: Np.ndarray (num_samples x num_inducing)
|
||||
:param X: Observed data inputs
|
||||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)
|
||||
|
||||
returns: dL_dtheta
|
||||
"""
|
||||
assert X.shape[1] == self.input_dim
|
||||
target = np.zeros(self.size)
|
||||
if X2 is None:
|
||||
[p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||
else:
|
||||
[p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
"""Compute the gradient of the objective function with respect to X.
|
||||
|
||||
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
|
||||
:type dL_dK: np.ndarray (num_samples x num_inducing)
|
||||
:param X: Observed data inputs
|
||||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||
|
||||
target = np.zeros_like(X)
|
||||
if X2 is None:
|
||||
[p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
else:
|
||||
[p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
return target
|
||||
|
||||
def Kdiag(self, X, which_parts='all'):
|
||||
"""Compute the diagonal of the covariance function for inputs X."""
|
||||
if which_parts == 'all':
|
||||
which_parts = [True] * self.size
|
||||
assert X.shape[1] == self.input_dim
|
||||
target = np.zeros(X.shape[0])
|
||||
[p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on]
|
||||
return target
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X):
|
||||
"""Compute the gradient of the diagonal of the covariance function with respect to the parameters."""
|
||||
assert X.shape[1] == self.input_dim
|
||||
assert dL_dKdiag.size == X.shape[0]
|
||||
target = np.zeros(self.size)
|
||||
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X):
|
||||
assert X.shape[1] == self.input_dim
|
||||
target = np.zeros_like(X)
|
||||
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
return target
|
||||
|
||||
def psi0(self, Z, mu, S):
|
||||
target = np.zeros(mu.shape[0])
|
||||
[p.psi0(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
return target
|
||||
|
||||
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
|
||||
target = np.zeros(self.size)
|
||||
[p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
|
||||
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
|
||||
[p.dpsi0_dmuS(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
return target_mu, target_S
|
||||
|
||||
def psi1(self, Z, mu, S):
|
||||
target = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
[p.psi1(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
return target
|
||||
|
||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
|
||||
target = np.zeros((self.size))
|
||||
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self._parameters_, self._param_slices_, self.input_slices)]
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
|
||||
target = np.zeros_like(Z)
|
||||
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
return target
|
||||
|
||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
|
||||
"""return shapes are num_samples,num_inducing,input_dim"""
|
||||
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
||||
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
return target_mu, target_S
|
||||
|
||||
def psi2(self, Z, mu, S):
|
||||
"""
|
||||
Computer the psi2 statistics for the covariance function.
|
||||
|
||||
:param Z: np.ndarray of inducing inputs (num_inducing x input_dim)
|
||||
:param mu, S: np.ndarrays of means and variances (each num_samples x input_dim)
|
||||
:returns psi2: np.ndarray (num_samples,num_inducing,num_inducing)
|
||||
|
||||
"""
|
||||
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
|
||||
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
|
||||
# compute the "cross" terms
|
||||
# TODO: input_slices needed
|
||||
crossterms = 0
|
||||
|
||||
for [p1, i_s1], [p2, i_s2] in itertools.combinations(zip(self._parameters_, self.input_slices), 2):
|
||||
if i_s1 == i_s2:
|
||||
# TODO psi1 this must be faster/better/precached/more nice
|
||||
tmp1 = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
p1.psi1(Z[:, i_s1], mu[:, i_s1], S[:, i_s1], tmp1)
|
||||
tmp2 = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
p2.psi1(Z[:, i_s2], mu[:, i_s2], S[:, i_s2], tmp2)
|
||||
|
||||
prod = np.multiply(tmp1, tmp2)
|
||||
crossterms += prod[:, :, None] + prod[:, None, :]
|
||||
|
||||
target += crossterms
|
||||
return target
|
||||
|
||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
|
||||
"""Gradient of the psi2 statistics with respect to the parameters."""
|
||||
target = np.zeros(self.size)
|
||||
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)]
|
||||
|
||||
# compute the "cross" terms
|
||||
# TODO: better looping, input_slices
|
||||
for i1, i2 in itertools.permutations(range(len(self._parameters_)), 2):
|
||||
p1, p2 = self._parameters_[i1], self._parameters_[i2]
|
||||
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
|
||||
ps1, ps2 = self._param_slices_[i1], self._param_slices_[i2]
|
||||
|
||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
p1.psi1(Z, mu, S, tmp)
|
||||
p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2])
|
||||
|
||||
return self._transform_gradients(target)
|
||||
|
||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
|
||||
target = np.zeros_like(Z)
|
||||
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
# target *= 2
|
||||
|
||||
# compute the "cross" terms
|
||||
# TODO: we need input_slices here.
|
||||
for p1, p2 in itertools.permutations(self._parameters_, 2):
|
||||
# if p1.name == 'linear' and p2.name == 'linear':
|
||||
# raise NotImplementedError("We don't handle linear/linear cross-terms")
|
||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
p1.psi1(Z, mu, S, tmp)
|
||||
p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1), Z, mu, S, target)
|
||||
|
||||
return target * 2
|
||||
|
||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
|
||||
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
||||
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]
|
||||
|
||||
# compute the "cross" terms
|
||||
# TODO: we need input_slices here.
|
||||
for p1, p2 in itertools.permutations(self._parameters_, 2):
|
||||
# if p1.name == 'linear' and p2.name == 'linear':
|
||||
# raise NotImplementedError("We don't handle linear/linear cross-terms")
|
||||
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||
p1.psi1(Z, mu, S, tmp)
|
||||
p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target_mu, target_S)
|
||||
|
||||
return target_mu, target_S
|
||||
|
||||
def plot(self, *args, **kwargs):
|
||||
"""
|
||||
See GPy.plotting.matplot_dep.plot
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import kernel_plots
|
||||
kernel_plots.plot(self,*args)
|
||||
|
||||
from GPy.core.model import Model
|
||||
|
||||
class Kern_check_model(Model):
|
||||
"""This is a dummy model class used as a base class for checking that the gradients of a given kernel are implemented correctly. It enables checkgradient() to be called independently on a kernel."""
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Model.__init__(self, 'kernel_test_model')
|
||||
num_samples = 20
|
||||
num_samples2 = 10
|
||||
if kernel==None:
|
||||
kernel = GPy.kern.rbf(1)
|
||||
if X==None:
|
||||
X = np.random.randn(num_samples, kernel.input_dim)
|
||||
if dL_dK==None:
|
||||
if X2==None:
|
||||
dL_dK = np.ones((X.shape[0], X.shape[0]))
|
||||
else:
|
||||
dL_dK = np.ones((X.shape[0], X2.shape[0]))
|
||||
|
||||
self.kernel=kernel
|
||||
self.add_parameter(kernel)
|
||||
self.X = X
|
||||
self.X2 = X2
|
||||
self.dL_dK = dL_dK
|
||||
|
||||
def is_positive_definite(self):
|
||||
v = np.linalg.eig(self.kernel.K(self.X))[0]
|
||||
if any(v<-10*sys.float_info.epsilon):
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def log_likelihood(self):
|
||||
return (self.dL_dK*self.kernel.K(self.X, self.X2)).sum()
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
raise NotImplementedError, "This needs to be implemented to use the kern_check_model class."
|
||||
|
||||
class Kern_check_dK_dtheta(Kern_check_model):
|
||||
"""This class allows gradient checks for the gradient of a kernel with respect to parameters. """
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel._param_grad_helper(self.dL_dK, self.X, self.X2)
|
||||
|
||||
class Kern_check_dKdiag_dtheta(Kern_check_model):
|
||||
"""This class allows gradient checks of the gradient of the diagonal of a kernel with respect to the parameters."""
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||
if dL_dK==None:
|
||||
self.dL_dK = np.ones((self.X.shape[0]))
|
||||
def parameters_changed(self):
|
||||
self.kernel.update_gradients_full(self.dL_dK, self.X)
|
||||
|
||||
def log_likelihood(self):
|
||||
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel.dKdiag_dtheta(self.dL_dK, self.X)
|
||||
|
||||
class Kern_check_dK_dX(Kern_check_model):
|
||||
"""This class allows gradient checks for the gradient of a kernel with respect to X. """
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Kern_check_model.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=X2)
|
||||
self.remove_parameter(kernel)
|
||||
self.X = Param('X', self.X)
|
||||
self.add_parameter(self.X)
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel.gradients_X(self.dL_dK, self.X, self.X2).flatten()
|
||||
|
||||
class Kern_check_dKdiag_dX(Kern_check_dK_dX):
|
||||
"""This class allows gradient checks for the gradient of a kernel diagonal with respect to X. """
|
||||
def __init__(self, kernel=None, dL_dK=None, X=None, X2=None):
|
||||
Kern_check_dK_dX.__init__(self,kernel=kernel,dL_dK=dL_dK, X=X, X2=None)
|
||||
if dL_dK==None:
|
||||
self.dL_dK = np.ones((self.X.shape[0]))
|
||||
|
||||
def log_likelihood(self):
|
||||
return (self.dL_dK*self.kernel.Kdiag(self.X)).sum()
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.kernel.dKdiag_dX(self.dL_dK, self.X).flatten()
|
||||
|
||||
def kern_test(kern, X=None, X2=None, output_ind=None, verbose=False):
|
||||
"""
|
||||
This function runs on kernels to check the correctness of their
|
||||
implementation. It checks that the covariance function is positive definite
|
||||
for a randomly generated data set.
|
||||
|
||||
:param kern: the kernel to be tested.
|
||||
:type kern: GPy.kern.Kernpart
|
||||
:param X: X input values to test the covariance function.
|
||||
:type X: ndarray
|
||||
:param X2: X2 input values to test the covariance function.
|
||||
:type X2: ndarray
|
||||
|
||||
"""
|
||||
pass_checks = True
|
||||
if X==None:
|
||||
X = np.random.randn(10, kern.input_dim)
|
||||
if output_ind is not None:
|
||||
X[:, output_ind] = np.random.randint(kern.output_dim, X.shape[0])
|
||||
if X2==None:
|
||||
X2 = np.random.randn(20, kern.input_dim)
|
||||
if output_ind is not None:
|
||||
X2[:, output_ind] = np.random.randint(kern.output_dim, X2.shape[0])
|
||||
|
||||
if verbose:
|
||||
print("Checking covariance function is positive definite.")
|
||||
result = Kern_check_model(kern, X=X).is_positive_definite()
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Positive definite check failed for " + kern.name + " covariance function.")
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X) wrt theta.")
|
||||
result = Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt theta.")
|
||||
result = Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dtheta(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt theta.")
|
||||
result = Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=verbose)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of Kdiag(X) wrt theta failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dKdiag_dtheta(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X) wrt X.")
|
||||
try:
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("gradients_X not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dX(kern, X=X, X2=None).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of K(X, X2) wrt X.")
|
||||
try:
|
||||
result = Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("gradients_X not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of K(X, X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dK_dX(kern, X=X, X2=X2).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print("Checking gradients of Kdiag(X) wrt X.")
|
||||
try:
|
||||
result = Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=verbose)
|
||||
except NotImplementedError:
|
||||
result=True
|
||||
if verbose:
|
||||
print("gradients_X not implemented for " + kern.name)
|
||||
if result and verbose:
|
||||
print("Check passed.")
|
||||
if not result:
|
||||
print("Gradient of Kdiag(X) wrt X failed for " + kern.name + " covariance function. Gradient values as follows:")
|
||||
Kern_check_dKdiag_dX(kern, X=X).checkgrad(verbose=True)
|
||||
pass_checks = False
|
||||
return False
|
||||
|
||||
return pass_checks
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
import bias
|
||||
import Brownian
|
||||
import coregionalize
|
||||
import exponential
|
||||
import eq_ode1
|
||||
import finite_dimensional
|
||||
import fixed
|
||||
import gibbs
|
||||
import hetero
|
||||
import hierarchical
|
||||
import independent_outputs
|
||||
import linear
|
||||
import Matern32
|
||||
import Matern52
|
||||
import mlp
|
||||
import ODE_1
|
||||
import periodic_exponential
|
||||
import periodic_Matern32
|
||||
import periodic_Matern52
|
||||
import poly
|
||||
import prod_orthogonal
|
||||
import prod
|
||||
import rational_quadratic
|
||||
import rbfcos
|
||||
import rbf
|
||||
import rbf_inv
|
||||
import spline
|
||||
import symmetric
|
||||
import white
|
||||
|
|
@ -1,176 +0,0 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
#from ...core.parameterized.Parameterized import set_as_parameter
|
||||
from ...core.parameterization import Parameterized
|
||||
|
||||
class Kernpart(Parameterized):
|
||||
def __init__(self,input_dim,name):
|
||||
"""
|
||||
The base class for a kernpart: a positive definite function
|
||||
which forms part of a covariance function (kernel).
|
||||
|
||||
:param input_dim: the number of input dimensions to the function
|
||||
:type input_dim: int
|
||||
|
||||
Do not instantiate.
|
||||
"""
|
||||
super(Kernpart, self).__init__(name)
|
||||
# the input dimensionality for the covariance
|
||||
self.input_dim = input_dim
|
||||
# the number of optimisable parameters
|
||||
# the name of the covariance function.
|
||||
# link to parameterized objects
|
||||
#self._X = None
|
||||
|
||||
def connect_input(self, X):
|
||||
X.add_observer(self, self.on_input_change)
|
||||
#self._X = X
|
||||
|
||||
def on_input_change(self, X):
|
||||
"""
|
||||
During optimization this function will be called when
|
||||
the inputs X changed. Use this to update caches dependent
|
||||
on the inputs X.
|
||||
"""
|
||||
# overwrite this to update kernel when inputs X change
|
||||
pass
|
||||
|
||||
|
||||
# def set_as_parameter_named(self, name, gradient, index=None, *args, **kwargs):
|
||||
# """
|
||||
# :param names: name of parameter to set as parameter
|
||||
# :param gradient: gradient method to get the gradient of this parameter
|
||||
# :param index: index of where to place parameter in printing
|
||||
# :param args, kwargs: additional arguments to gradient
|
||||
#
|
||||
# Convenience method to connect Kernpart parameters:
|
||||
# parameter with name (attribute of this Kernpart) will be set as parameter with following name:
|
||||
#
|
||||
# kernel_name + _ + parameter_name
|
||||
#
|
||||
# To add the kernels name to the parameter name use this method to
|
||||
# add parameters.
|
||||
# """
|
||||
# self.set_as_parameter(name, getattr(self, name), gradient, index, *args, **kwargs)
|
||||
# def set_as_parameter(self, name, array, gradient, index=None, *args, **kwargs):
|
||||
# """
|
||||
# See :py:func:`GPy.core.parameterized.Parameterized.set_as_parameter`
|
||||
#
|
||||
# Note: this method adds the kernels name in front of the parameter.
|
||||
# """
|
||||
# p = Param(self.name+"_"+name, array, gradient, *args, **kwargs)
|
||||
# if index is None:
|
||||
# self._parameters_.append(p)
|
||||
# else:
|
||||
# self._parameters_.insert(index, p)
|
||||
# self.__dict__[name] = p
|
||||
#set_as_parameter.__doc__ += set_as_parameter.__doc__ # @UndefinedVariable
|
||||
# def _get_params(self):
|
||||
# raise NotImplementedError
|
||||
# def _set_params(self,x):
|
||||
# raise NotImplementedError
|
||||
# def _get_param_names(self):
|
||||
# raise NotImplementedError
|
||||
def K(self,X,X2,target):
|
||||
raise NotImplementedError
|
||||
def Kdiag(self,X,target):
|
||||
raise NotImplementedError
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
raise NotImplementedError
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
# In the base case compute this by calling _param_grad_helper. Need to
|
||||
# override for stationary covariances (for example) to save
|
||||
# time.
|
||||
for i in range(X.shape[0]):
|
||||
self._param_grad_helper(dL_dKdiag[i], X[i, :][None, :], X2=None, target=target)
|
||||
def psi0(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def psi1(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi1_dtheta(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def psi2(self,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
|
||||
raise NotImplementedError
|
||||
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
|
||||
raise NotImplementedError
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
raise NotImplementedError
|
||||
def dKdiag_dX(self, dL_dK, X, target):
|
||||
raise NotImplementedError
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||
raise NotImplementedError
|
||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
"""Set the gradients of all parameters when doing sparse (M) inference."""
|
||||
raise NotImplementedError
|
||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
||||
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
|
||||
raise NotImplementedError
|
||||
|
||||
class Kernpart_stationary(Kernpart):
|
||||
def __init__(self, input_dim, lengthscale=None, ARD=False):
|
||||
self.input_dim = input_dim
|
||||
self.ARD = ARD
|
||||
if not ARD:
|
||||
self.num_params = 2
|
||||
if lengthscale is not None:
|
||||
self.lengthscale = np.asarray(lengthscale)
|
||||
assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
||||
else:
|
||||
self.lengthscale = np.ones(1)
|
||||
else:
|
||||
self.num_params = self.input_dim + 1
|
||||
if lengthscale is not None:
|
||||
self.lengthscale = np.asarray(lengthscale)
|
||||
assert self.lengthscale.size == self.input_dim, "bad number of lengthscales"
|
||||
else:
|
||||
self.lengthscale = np.ones(self.input_dim)
|
||||
|
||||
# initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))
|
||||
|
||||
def _set_params(self, x):
|
||||
self.lengthscale = x
|
||||
self.lengthscale2 = np.square(self.lengthscale)
|
||||
# reset cached results
|
||||
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
||||
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
# For stationary covariances, derivative of diagonal elements
|
||||
# wrt lengthscale is 0.
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
def dKdiag_dX(self, dL_dK, X, target):
|
||||
pass # true for all stationary kernels
|
||||
|
||||
|
||||
class Kernpart_inner(Kernpart):
|
||||
def __init__(self,input_dim):
|
||||
"""
|
||||
The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs.
|
||||
|
||||
:param input_dim: the number of input dimensions to the function
|
||||
:type input_dim: int
|
||||
|
||||
Do not instantiate.
|
||||
"""
|
||||
Kernpart.__init__(self, input_dim)
|
||||
|
||||
# initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||
self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1))
|
||||
|
|
@ -1,125 +0,0 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
from coregionalize import Coregionalize
|
||||
import numpy as np
|
||||
import hashlib
|
||||
|
||||
class Prod(Kernpart):
|
||||
"""
|
||||
Computes the product of 2 kernels
|
||||
|
||||
:param k1, k2: the kernels to multiply
|
||||
:type k1, k2: Kernpart
|
||||
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
|
||||
:type tensor: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
def __init__(self,k1,k2,tensor=False):
|
||||
if tensor:
|
||||
super(Prod, self).__init__(k1.input_dim + k2.input_dim, k1.name + '_xx_' + k2.name)
|
||||
self.slice1 = slice(0,k1.input_dim)
|
||||
self.slice2 = slice(k1.input_dim,k1.input_dim+k2.input_dim)
|
||||
else:
|
||||
assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to multiply don't have the same dimension."
|
||||
super(Prod, self).__init__(k1.input_dim, k1.name + '_x_' + k2.name)
|
||||
self.slice1 = slice(0,self.input_dim)
|
||||
self.slice2 = slice(0,self.input_dim)
|
||||
self.k1 = k1
|
||||
self.k2 = k2
|
||||
self.add_parameters(self.k1, self.k2)
|
||||
|
||||
#initialize cache
|
||||
self._X, self._X2 = np.empty(shape=(2,1))
|
||||
self._params = None
|
||||
|
||||
def K(self,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
target += self._K1 * self._K2
|
||||
|
||||
def K1(self,X, X2):
|
||||
"""Compute the part of the kernel associated with k1."""
|
||||
self._K_computations(X, X2)
|
||||
return self._K1
|
||||
|
||||
def K2(self, X, X2):
|
||||
"""Compute the part of the kernel associated with k2."""
|
||||
self._K_computations(X, X2)
|
||||
return self._K2
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
self._K_computations(X, None)
|
||||
self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1])
|
||||
self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2])
|
||||
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""Derivative of the covariance matrix with respect to the parameters."""
|
||||
self._K_computations(X,X2)
|
||||
if X2 is None:
|
||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params])
|
||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:])
|
||||
else:
|
||||
self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params])
|
||||
self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:])
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
target1 = np.zeros(X.shape[0])
|
||||
target2 = np.zeros(X.shape[0])
|
||||
self.k1.Kdiag(X[:,self.slice1],target1)
|
||||
self.k2.Kdiag(X[:,self.slice2],target2)
|
||||
target += target1 * target2
|
||||
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
K2 = np.zeros(X.shape[0])
|
||||
self.k1.Kdiag(X[:,self.slice1],K1)
|
||||
self.k2.Kdiag(X[:,self.slice2],K2)
|
||||
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params])
|
||||
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:])
|
||||
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
self._K_computations(X,X2)
|
||||
if X2 is None:
|
||||
if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize):
|
||||
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1])
|
||||
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2])
|
||||
else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize):
|
||||
#NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei]
|
||||
X2 = X
|
||||
self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
|
||||
else:
|
||||
self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2])
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
K2 = np.zeros(X.shape[0])
|
||||
self.k1.Kdiag(X[:,self.slice1],K1)
|
||||
self.k2.Kdiag(X[:,self.slice2],K2)
|
||||
|
||||
self.k1.gradients_X(dL_dKdiag*K2, X[:,self.slice1], target[:,self.slice1])
|
||||
self.k2.gradients_X(dL_dKdiag*K1, X[:,self.slice2], target[:,self.slice2])
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
||||
self._X = X.copy()
|
||||
self._params == self._get_params().copy()
|
||||
if X2 is None:
|
||||
self._X2 = None
|
||||
self._K1 = np.zeros((X.shape[0],X.shape[0]))
|
||||
self._K2 = np.zeros((X.shape[0],X.shape[0]))
|
||||
self.k1.K(X[:,self.slice1],None,self._K1)
|
||||
self.k2.K(X[:,self.slice2],None,self._K2)
|
||||
else:
|
||||
self._X2 = X2.copy()
|
||||
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
|
||||
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
|
||||
self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1)
|
||||
self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2)
|
||||
|
||||
|
|
@ -57,26 +57,16 @@ class BayesianGPLVM(SparseGP, GPLVM):
|
|||
self.init = state.pop()
|
||||
SparseGP._setstate(self, state)
|
||||
|
||||
def dL_dmuS(self):
|
||||
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi0_dmuS(self.grad_dict['dL_dpsi0'], self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi1, dL_dS_psi1 = self.kern.dpsi1_dmuS(self.grad_dict['dL_dpsi1'], self.Z, self.X, self.X_variance)
|
||||
dL_dmu_psi2, dL_dS_psi2 = self.kern.dpsi2_dmuS(self.grad_dict['dL_dpsi2'], self.Z, self.X, self.X_variance)
|
||||
dL_dmu = dL_dmu_psi0 + dL_dmu_psi1 + dL_dmu_psi2
|
||||
dL_dS = dL_dS_psi0 + dL_dS_psi1 + dL_dS_psi2
|
||||
|
||||
return dL_dmu, dL_dS
|
||||
|
||||
def KL_divergence(self):
|
||||
var_mean = np.square(self.X).sum()
|
||||
var_S = np.sum(self.X_variance - np.log(self.X_variance))
|
||||
return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data
|
||||
|
||||
def parameters_changed(self):
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.X_variance, self.Z, self.likelihood, self.Y)
|
||||
self._update_gradients_Z(add=False)
|
||||
super(BayesianGPLVM, self).parameters_changed()
|
||||
|
||||
self._log_marginal_likelihood -= self.KL_divergence()
|
||||
dL_dmu, dL_dS = self.dL_dmuS()
|
||||
dL_dmu, dL_dS = self.kern.gradients_muS_variational(mu=self.X, S=self.X_variance, Z=self.Z, **self.grad_dict)
|
||||
|
||||
# dL:
|
||||
self.q.mean.gradient = dL_dmu
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ class GPRegression(GP):
|
|||
def __init__(self, X, Y, kernel=None):
|
||||
|
||||
if kernel is None:
|
||||
kernel = kern.rbf(X.shape[1])
|
||||
kernel = kern.RBF(X.shape[1])
|
||||
|
||||
likelihood = likelihoods.Gaussian()
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from GPy.util.linalg import PCA
|
|||
import numpy
|
||||
import itertools
|
||||
import pylab
|
||||
from GPy.kern.kern import kern
|
||||
from GPy.kern import Kern
|
||||
from GPy.models.bayesian_gplvm import BayesianGPLVM
|
||||
|
||||
class MRD2(Model):
|
||||
|
|
@ -64,11 +64,11 @@ class MRD(Model):
|
|||
# sort out the kernels
|
||||
if kernels is None:
|
||||
kernels = [None] * len(likelihood_or_Y_list)
|
||||
elif isinstance(kernels, kern):
|
||||
elif isinstance(kernels, Kern):
|
||||
kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))]
|
||||
else:
|
||||
assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output"
|
||||
assert all([isinstance(k, kern) for k in kernels]), "invalid kernel object detected!"
|
||||
assert all([isinstance(k, Kern) for k in kernels]), "invalid kernel object detected!"
|
||||
assert not ('kernel' in kw), "pass kernels through `kernels` argument"
|
||||
|
||||
self.input_dim = input_dim
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import pylab as pb
|
|||
import Tango
|
||||
from matplotlib.textpath import TextPath
|
||||
from matplotlib.transforms import offset_copy
|
||||
from ...kern.parts.linear import Linear
|
||||
from ...kern import Linear
|
||||
|
||||
|
||||
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from ...util.misc import param_to_array
|
|||
|
||||
|
||||
def plot_fit(model, plot_limits=None, which_data_rows='all',
|
||||
which_data_ycols='all', which_parts='all', fixed_inputs=[],
|
||||
which_data_ycols='all', fixed_inputs=[],
|
||||
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||
plot_raw=False,
|
||||
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
|
||||
|
|
@ -20,7 +20,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
|
||||
|
||||
Can plot only part of the data and part of the posterior functions
|
||||
using which_data_rowsm which_data_ycols and which_parts
|
||||
using which_data_rowsm which_data_ycols.
|
||||
|
||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
||||
:type plot_limits: np.array
|
||||
|
|
@ -28,8 +28,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
|
||||
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
|
||||
:type which_data_rows: 'all' or a list of integers
|
||||
:param which_parts: which of the kernel functions to plot (additively)
|
||||
:type which_parts: 'all', or list of bools
|
||||
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
|
||||
:type fixed_inputs: a list of tuples
|
||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||
|
|
@ -76,12 +74,12 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
|
||||
#make a prediction on the frame and plot it
|
||||
if plot_raw:
|
||||
m, v = model._raw_predict(Xgrid, which_parts=which_parts)
|
||||
m, v = model._raw_predict(Xgrid)
|
||||
lower = m - 2*np.sqrt(v)
|
||||
upper = m + 2*np.sqrt(v)
|
||||
Y = model.Y
|
||||
else:
|
||||
m, v, lower, upper = model.predict(Xgrid, which_parts=which_parts)
|
||||
m, v, lower, upper = model.predict(Xgrid)
|
||||
Y = model.Y
|
||||
for d in which_data_ycols:
|
||||
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
|
||||
|
|
@ -89,7 +87,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
|
||||
#optionally plot some samples
|
||||
if samples: #NOTE not tested with fixed_inputs
|
||||
Ysim = model.posterior_samples(Xgrid, samples, which_parts=which_parts)
|
||||
Ysim = model.posterior_samples(Xgrid, samples)
|
||||
for yi in Ysim.T:
|
||||
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
||||
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
|
||||
|
|
@ -131,10 +129,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
|
|||
|
||||
#predict on the frame and plot
|
||||
if plot_raw:
|
||||
m, _ = model._raw_predict(Xgrid, which_parts=which_parts)
|
||||
m, _ = model._raw_predict(Xgrid)
|
||||
Y = model.Y
|
||||
else:
|
||||
m, _, _, _ = model.predict(Xgrid, which_parts=which_parts)
|
||||
m, _, _, _ = model.predict(Xgrid)
|
||||
Y = model.data
|
||||
for d in which_data_ycols:
|
||||
m_d = m[:,d].reshape(resolution, resolution).T
|
||||
|
|
|
|||
|
|
@ -1,46 +1,89 @@
|
|||
from ..core.parameterization.array_core import ObservableArray, ParamList
|
||||
from ..core.parameterization.parameter_core import Observable
|
||||
from ..core.parameterization.array_core import ParamList
|
||||
|
||||
class Cacher(object):
|
||||
def __init__(self, operation, limit=5):
|
||||
def __init__(self, operation, limit=5, reset_on_first=False):
|
||||
self.limit = int(limit)
|
||||
self._reset_on_first = reset_on_first
|
||||
self.operation=operation
|
||||
self.cached_inputs = ParamList([])
|
||||
self.cached_inputs = []
|
||||
self.cached_outputs = []
|
||||
self.inputs_changed = []
|
||||
|
||||
def __call__(self, X):
|
||||
assert isinstance(X, ObservableArray)
|
||||
if X in self.cached_inputs:
|
||||
i = self.cached_inputs.index(X)
|
||||
def __call__(self, *args):
|
||||
if self._reset_on_first:
|
||||
assert isinstance(args[0], Observable)
|
||||
args[0].add_observer(args[0], self.reset)
|
||||
cached_args = args
|
||||
else:
|
||||
cached_args = args[1:]
|
||||
|
||||
|
||||
if not all([isinstance(arg, Observable) for arg in cached_args]):
|
||||
return self.operation(*args)
|
||||
if cached_args in self.cached_inputs:
|
||||
i = self.cached_inputs.index(cached_args)
|
||||
if self.inputs_changed[i]:
|
||||
self.cached_outputs[i] = self.operation(X)
|
||||
self.cached_outputs[i] = self.operation(*args)
|
||||
self.inputs_changed[i] = False
|
||||
return self.cached_outputs[i]
|
||||
else:
|
||||
if len(self.cached_inputs) == self.limit:
|
||||
X_ = self.cached_inputs.pop(0)
|
||||
X_.remove_observer(self)
|
||||
args_ = self.cached_inputs.pop(0)
|
||||
[a.remove_observer(self) for a in args_]
|
||||
self.inputs_changed.pop(0)
|
||||
self.cached_outputs.pop(0)
|
||||
|
||||
self.cached_inputs.append(X)
|
||||
self.cached_outputs.append(self.operation(X))
|
||||
self.cached_inputs.append(cached_args)
|
||||
self.cached_outputs.append(self.operation(*args))
|
||||
self.inputs_changed.append(False)
|
||||
X.add_observer(self, self.on_cache_changed)
|
||||
[a.add_observer(self, self.on_cache_changed) for a in args]
|
||||
return self.cached_outputs[-1]
|
||||
|
||||
def on_cache_changed(self, X):
|
||||
#print id(X)
|
||||
Xbase = X
|
||||
while Xbase is not None:
|
||||
try:
|
||||
i = self.cached_inputs.index(X)
|
||||
break
|
||||
except ValueError:
|
||||
Xbase = X.base
|
||||
continue
|
||||
self.inputs_changed[i] = True
|
||||
def on_cache_changed(self, arg):
|
||||
self.inputs_changed = [any([a is arg for a in args]) or old_ic for args, old_ic in zip(self.cached_inputs, self.inputs_changed)]
|
||||
|
||||
def reset(self, obj):
|
||||
[[a.remove_observer(self) for a in args] for args in self.cached_inputs]
|
||||
self.cached_inputs = []
|
||||
self.cached_outputs = []
|
||||
self.inputs_changed = []
|
||||
|
||||
|
||||
|
||||
|
||||
def cache_this(limit=5, reset_on_self=False):
|
||||
def limited_cache(f):
|
||||
c = Cacher(f, limit, reset_on_first=reset_on_self)
|
||||
def f_wrap(*args):
|
||||
return c(*args)
|
||||
f_wrap._cacher = c
|
||||
return f_wrap
|
||||
return limited_cache
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#Xbase = X
|
||||
#while Xbase is not None:
|
||||
#try:
|
||||
#i = self.cached_inputs.index(X)
|
||||
#break
|
||||
#except ValueError:
|
||||
#Xbase = X.base
|
||||
#continue
|
||||
#self.inputs_changed[i] = True
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -513,8 +513,8 @@ def toy_rbf_1d(seed=default_seed, num_samples=500):
|
|||
num_in = 1
|
||||
X = np.random.uniform(low= -1.0, high=1.0, size=(num_samples, num_in))
|
||||
X.sort(axis=0)
|
||||
rbf = GPy.kern.rbf(num_in, variance=1., lengthscale=np.array((0.25,)))
|
||||
white = GPy.kern.white(num_in, variance=1e-2)
|
||||
rbf = GPy.kern.RBF(num_in, variance=1., lengthscale=np.array((0.25,)))
|
||||
white = GPy.kern.White(num_in, variance=1e-2)
|
||||
kernel = rbf + white
|
||||
K = kernel.K(X)
|
||||
y = np.reshape(np.random.multivariate_normal(np.zeros(num_samples), K), (num_samples, 1))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue