diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 10ba8e6b..2dcf0e14 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -70,7 +70,7 @@ class GP(Model): def log_likelihood(self): return self._log_marginal_likelihood - def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False): + def _raw_predict(self, _Xnew, full_cov=False): """ Internal helper function for making predictions, does not account for normalization or likelihood @@ -80,29 +80,27 @@ class GP(Model): diagonal of the covariance is returned. """ - Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T + Kx = self.kern.K(_Xnew, self.X).T #LiKx, _ = dtrtrs(self.posterior.woodbury_chol, np.asfortranarray(Kx), lower=1) WiKx = np.dot(self.posterior.woodbury_inv, Kx) mu = np.dot(Kx.T, self.posterior.woodbury_vector) if full_cov: - Kxx = self.kern.K(_Xnew, which_parts=which_parts) + Kxx = self.kern.K(_Xnew) #var = Kxx - tdot(LiKx.T) var = np.dot(Kx.T, WiKx) else: - Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) + Kxx = self.kern.Kdiag(_Xnew) #var = Kxx - np.sum(LiKx*LiKx, 0) var = Kxx - np.sum(WiKx*Kx, 0) var = var.reshape(-1, 1) return mu, var - def predict(self, Xnew, which_parts='all', full_cov=False, **likelihood_args): + def predict(self, Xnew, full_cov=False, **likelihood_args): """ Predict the function(s) at the new point(s) Xnew. :param Xnew: The points at which to make a prediction :type Xnew: np.ndarray, Nnew x self.input_dim - :param which_parts: specifies which outputs kernel(s) to use in prediction - :type which_parts: ('all', list of bools) :param full_cov: whether to return the full covariance matrix, or just the diagonal :type full_cov: bool @@ -118,13 +116,13 @@ class GP(Model): """ #predict the latent function values - mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) + mu, var = self._raw_predict(Xnew, full_cov=full_cov) # now push through likelihood mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args) return mean, var, _025pm, _975pm - def posterior_samples_f(self,X,size=10,which_parts='all',full_cov=True): + def posterior_samples_f(self,X,size=10, full_cov=True): """ Samples the posterior GP at the points X. @@ -132,13 +130,11 @@ class GP(Model): :type X: np.ndarray, Nnew x self.input_dim. :param size: the number of a posteriori samples. :type size: int. - :param which_parts: which of the kernel functions to use (additively). - :type which_parts: 'all', or list of bools. :param full_cov: whether to return the full covariance matrix, or just the diagonal. :type full_cov: bool. :returns: Ysim: set of simulations, a Numpy array (N x samples). """ - m, v = self._raw_predict(X, which_parts=which_parts, full_cov=full_cov) + m, v = self._raw_predict(X, full_cov=full_cov) v = v.reshape(m.size,-1) if len(v.shape)==3 else v if not full_cov: Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T @@ -147,7 +143,7 @@ class GP(Model): return Ysim - def posterior_samples(self,X,size=10,which_parts='all',full_cov=True,noise_model=None): + def posterior_samples(self,X,size=10, full_cov=True,noise_model=None): """ Samples the posterior GP at the points X. @@ -155,15 +151,13 @@ class GP(Model): :type X: np.ndarray, Nnew x self.input_dim. :param size: the number of a posteriori samples. :type size: int. - :param which_parts: which of the kernel functions to use (additively). - :type which_parts: 'all', or list of bools. :param full_cov: whether to return the full covariance matrix, or just the diagonal. :type full_cov: bool. :param noise_model: for mixed noise likelihood, the noise model to use in the samples. :type noise_model: integer. :returns: Ysim: set of simulations, a Numpy array (N x samples). """ - Ysim = self.posterior_samples_f(X, size, which_parts=which_parts, full_cov=full_cov) + Ysim = self.posterior_samples_f(X, size, full_cov=full_cov) if isinstance(self.likelihood, Gaussian): noise_std = np.sqrt(self.likelihood._get_params()) Ysim += np.random.normal(0,noise_std,Ysim.shape) diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 55567051..5cac1857 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -41,7 +41,7 @@ def coregionalization_toy2(optimize=True, plot=True): Y = np.vstack((Y1, Y2)) #build the kernel - k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) + k1 = GPy.kern.RBF(1) + GPy.kern.bias(1) k2 = GPy.kern.coregionalize(2,1) k = k1**k2 m = GPy.models.GPRegression(X, Y, kernel=k) @@ -68,7 +68,7 @@ def coregionalization_toy2(optimize=True, plot=True): # Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05 # Y = np.vstack((Y1, Y2)) # -# k1 = GPy.kern.rbf(1) +# k1 = GPy.kern.RBF(1) # m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1]) # m.constrain_fixed('.*rbf_var', 1.) # m.optimize(max_iters=100) @@ -127,7 +127,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True): Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None], np.random.randint(0, 4, num_inducing)[:, None])) - k1 = GPy.kern.rbf(1) + k1 = GPy.kern.RBF(1) k2 = GPy.kern.coregionalize(output_dim=5, rank=5) k = k1**k2 @@ -156,7 +156,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 data['Y'] = data['Y'] - np.mean(data['Y']) - lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf) + lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.RBF) if plot: pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet) ax = pb.gca() @@ -172,8 +172,8 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 optim_point_y = np.empty(2) np.random.seed(seed=seed) for i in range(0, model_restarts): - # kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) - kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50)) + # kern = GPy.kern.RBF(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + kern = GPy.kern.RBF(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50)) m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern) m['noise_variance'] = np.random.uniform(1e-3, 1) @@ -196,7 +196,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000 ax.set_ylim(ylim) return m # (models, lls) -def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf): +def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF): """ Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales. @@ -278,10 +278,10 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True): optimizer='scg' x_len = 30 X = np.linspace(0, 10, x_len)[:, None] - f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X)) + f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.RBF(1).K(X)) Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None] - kern = GPy.kern.rbf(1) + kern = GPy.kern.RBF(1) poisson_lik = GPy.likelihoods.Poisson() laplace_inf = GPy.inference.latent_function_inference.LaplaceInference() @@ -319,10 +319,10 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize if kernel_type == 'linear': kernel = GPy.kern.linear(X.shape[1], ARD=1) elif kernel_type == 'rbf_inv': - kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1) + kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1) else: - kernel = GPy.kern.rbf(X.shape[1], ARD=1) - kernel += GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) + kernel = GPy.kern.RBF(X.shape[1], ARD=1) + kernel += GPy.kern.White(X.shape[1]) + GPy.kern.bias(X.shape[1]) m = GPy.models.GPRegression(X, Y, kernel) # len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25 # m.set_prior('.*lengthscale',len_prior) @@ -358,9 +358,9 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o if kernel_type == 'linear': kernel = GPy.kern.linear(X.shape[1], ARD=1) elif kernel_type == 'rbf_inv': - kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1) + kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1) else: - kernel = GPy.kern.rbf(X.shape[1], ARD=1) + kernel = GPy.kern.RBF(X.shape[1], ARD=1) #kernel += GPy.kern.bias(X.shape[1]) X_variance = np.ones(X.shape) * 0.5 m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance) @@ -421,7 +421,7 @@ def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, opti X = np.random.uniform(-3., 3., (num_samples, 1)) Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05 # construct kernel - rbf = GPy.kern.rbf(1) + rbf = GPy.kern.RBF(1) # create simple GP Model m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) m.checkgrad(verbose=1) @@ -444,7 +444,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt Y[inan] = np.nan # construct kernel - rbf = GPy.kern.rbf(2) + rbf = GPy.kern.RBF(2) # create simple GP Model m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) @@ -476,9 +476,9 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True): # likelihood = GPy.likelihoods.Gaussian(Y) Z = np.random.uniform(-3., 3., (7, 1)) - k = GPy.kern.rbf(1) + k = GPy.kern.RBF(1) # create simple GP Model - no input uncertainty on this one - m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z) + m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z) if optimize: m.optimize('scg', messages=1, max_iters=max_iters) @@ -489,7 +489,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True): print m # the same Model with uncertainty - m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.rbf(1), Z=Z, X_variance=S) + m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S) if optimize: m.optimize('scg', messages=1, max_iters=max_iters) if plot: diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 7760f48f..214e230f 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -1,6 +1,7 @@ from _src.rbf import RBF from _src.white import White from _src.kern import Kern +Linear = 'foo' #import bias #import Brownian #import coregionalize diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 8d916941..8d81674b 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -5,8 +5,8 @@ import sys import numpy as np import itertools from linear import Linear -from ..core.parameterization import Parameterized -from GPy.core.parameterization.param import Param +from ...core.parameterization import Parameterized +from ...core.parameterization.param import Param from kern import Kern class Add(Kern): @@ -27,7 +27,7 @@ class Add(Kern): self.add_parameters(*subkerns) - def K(self, X, X2=None, which_parts='all'): + def K(self, X, X2=None): """ Compute the kernel function. @@ -35,52 +35,22 @@ class Add(Kern): :param X2: (optional) the second set of arguments to the kernel. If X2 is None, this is passed throgh to the 'part' object, which handles this as X2 == X. - :param which_parts: a list of booleans detailing whether to include - each of the part functions. By default, 'all' - indicates all parts """ - if which_parts == 'all': - which_parts = [True] * self.size assert X.shape[1] == self.input_dim if X2 is None: - target = np.zeros((X.shape[0], X.shape[0])) - [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] + return sum([p.K(X[:, i_s], None) for p, i_s in zip(self._parameters_, self.input_slices)]) else: - target = np.zeros((X.shape[0], X2.shape[0])) - [p.K(X[:, i_s], X2[:, i_s], target=target) for p, i_s, part_i_used in zip(self._parameters_, self.input_slices, which_parts) if part_i_used] - return target + return sum([p.K(X[:, i_s], X2[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) def update_gradients_full(self, dL_dK, X): - [p.update_gradients_full(dL_dK, X) for p in self._parameters_] + [p.update_gradients_full(dL_dK, X[:,i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z): - [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X, Z) for p in self._parameters_] + [p.update_gradients_sparse(dL_dKmm, dL_dKnm, dL_dKdiag, X[:,i_s], Z[:,i_s]) for p, i_s in zip(self._parameters_, i_s)] def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): [p.update_gradients_variational(dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z) for p in self._parameters_] - def _param_grad_helper(self, dL_dK, X, X2=None): - """ - Compute the gradient of the covariance function with respect to the parameters. - - :param dL_dK: An array of gradients of the objective function with respect to the covariance function. - :type dL_dK: Np.ndarray (num_samples x num_inducing) - :param X: Observed data inputs - :type X: np.ndarray (num_samples x input_dim) - :param X2: Observed data inputs (optional, defaults to X) - :type X2: np.ndarray (num_inducing x input_dim) - - returns: dL_dtheta - """ - assert X.shape[1] == self.input_dim - target = np.zeros(self.size) - if X2 is None: - [p._param_grad_helper(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - else: - [p._param_grad_helper(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self._parameters_, self.input_slices, self._param_slices_)] - - return self._transform_gradients(target) - def gradients_X(self, dL_dK, X, X2=None): """Compute the gradient of the objective function with respect to X. @@ -93,33 +63,15 @@ class Add(Kern): target = np.zeros_like(X) if X2 is None: - [p.gradients_X(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + [np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], None), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] else: - [p.gradients_X(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] + [np.add(target[:,i_s], p.gradients_X(dL_dK, X[:, i_s], X2[:,i_s]), target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] return target - def Kdiag(self, X, which_parts='all'): + def Kdiag(self, X): """Compute the diagonal of the covariance function for inputs X.""" - if which_parts == 'all': - which_parts = [True] * self.size assert X.shape[1] == self.input_dim - target = np.zeros(X.shape[0]) - [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self._parameters_, self.input_slices, which_parts) if part_on] - return target - - def dKdiag_dtheta(self, dL_dKdiag, X): - """Compute the gradient of the diagonal of the covariance function with respect to the parameters.""" - assert X.shape[1] == self.input_dim - assert dL_dKdiag.size == X.shape[0] - target = np.zeros(self.size) - [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self._parameters_, self.input_slices, self._param_slices_)] - return self._transform_gradients(target) - - def dKdiag_dX(self, dL_dKdiag, X): - assert X.shape[1] == self.input_dim - target = np.zeros_like(X) - [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)] - return target + return sum([p.Kdiag(X[:, i_s]) for p, i_s in zip(self._parameters_, self.input_slices)]) def psi0(self, Z, mu, S): target = np.zeros(mu.shape[0]) diff --git a/GPy/kern/_src/coregionalize.py b/GPy/kern/_src/coregionalize.py index 8b2f17e8..69fc27ef 100644 --- a/GPy/kern/_src/coregionalize.py +++ b/GPy/kern/_src/coregionalize.py @@ -1,12 +1,12 @@ # Copyright (c) 2012, James Hensman and Ricardo Andrade # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import Kernpart +from kern import Kern import numpy as np from scipy import weave from ...core.parameterization import Param -class Coregionalize(Kernpart): +class Coregionalize(Kern): """ Covariance function for intrinsic/linear coregionalization models @@ -133,6 +133,8 @@ class Coregionalize(Kernpart): #dkappa = dL_dKdiag_small #target += np.hstack([dW.flatten(),dkappa]) - def gradients_X(self,dL_dK,X,X2,target): - #NOTE In this case, pass is equivalent to returning zero. - pass + def gradients_X(self,dL_dK,X,X2): + if X2 is None: + return np.zeros((X.shape[0], X.shape[0])) + else: + return np.zeros((X.shape[0], X2.shape[0])) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index af362498..b5b84305 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -4,8 +4,8 @@ import sys import numpy as np import itertools -from ..core.parameterization import Parameterized -from GPy.core.parameterization.param import Param +from ...core.parameterization import Parameterized +from ...core.parameterization.param import Param class Kern(Parameterized): diff --git a/GPy/kern/_src/kernpart.py b/GPy/kern/_src/kernpart.py deleted file mode 100644 index 097ed741..00000000 --- a/GPy/kern/_src/kernpart.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) -#from ...core.parameterized.Parameterized import set_as_parameter -from ...core.parameterization import Parameterized - -class Kernpart_stationary(Kernpart): - def __init__(self, input_dim, lengthscale=None, ARD=False): - self.input_dim = input_dim - self.ARD = ARD - if not ARD: - self.num_params = 2 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" - else: - self.lengthscale = np.ones(1) - else: - self.num_params = self.input_dim + 1 - if lengthscale is not None: - self.lengthscale = np.asarray(lengthscale) - assert self.lengthscale.size == self.input_dim, "bad number of lengthscales" - else: - self.lengthscale = np.ones(self.input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - - def _set_params(self, x): - self.lengthscale = x - self.lengthscale2 = np.square(self.lengthscale) - # reset cached results - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S - - - def dKdiag_dtheta(self, dL_dKdiag, X, target): - # For stationary covariances, derivative of diagonal elements - # wrt lengthscale is 0. - target[0] += np.sum(dL_dKdiag) - - def dKdiag_dX(self, dL_dK, X, target): - pass # true for all stationary kernels - - -class Kernpart_inner(Kernpart): - def __init__(self,input_dim): - """ - The base class for a kernpart_inner: a positive definite function which forms part of a kernel that is based on the inner product between inputs. - - :param input_dim: the number of input dimensions to the function - :type input_dim: int - - Do not instantiate. - """ - Kernpart.__init__(self, input_dim) - - # initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3, 1)) - self._X, self._X2, self._parameters_ = np.empty(shape=(3, 1)) diff --git a/GPy/kern/_src/linear.py b/GPy/kern/_src/linear.py index ab77d4e6..5083c8de 100644 --- a/GPy/kern/_src/linear.py +++ b/GPy/kern/_src/linear.py @@ -5,10 +5,10 @@ import numpy as np from scipy import weave from kern import Kern -from ..util.linalg import tdot -from ..util.misc import fast_array_equal, param_to_array -from ..core.parameterization import Param -from ..core.parameterization.transformations import Logexp +from ...util.linalg import tdot +from ...util.misc import fast_array_equal, param_to_array +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp class Linear(Kern): """ diff --git a/GPy/kern/_src/prod.py b/GPy/kern/_src/prod.py index 08221de7..e0d069b2 100644 --- a/GPy/kern/_src/prod.py +++ b/GPy/kern/_src/prod.py @@ -35,64 +35,36 @@ class Prod(Kern): self._X, self._X2 = np.empty(shape=(2,1)) self._params = None - def K(self,X,X2,target): + def K(self, X, X2=None): self._K_computations(X,X2) - target += self._K1 * self._K2 - - def K1(self,X, X2): - """Compute the part of the kernel associated with k1.""" - self._K_computations(X, X2) - return self._K1 - - def K2(self, X, X2): - """Compute the part of the kernel associated with k2.""" - self._K_computations(X, X2) - return self._K2 + return self._K1 * self._K2 def update_gradients_full(self, dL_dK, X): self._K_computations(X, None) self.k1.update_gradients_full(dL_dK*self._K2, X[:,self.slice1]) self.k2.update_gradients_full(dL_dK*self._K1, X[:,self.slice2]) - def _param_grad_helper(self,dL_dK,X,X2,target): - """Derivative of the covariance matrix with respect to the parameters.""" - self._K_computations(X,X2) - if X2 is None: - self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params]) - self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:]) - else: - self.k1._param_grad_helper(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params]) - self.k2._param_grad_helper(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:]) - - def Kdiag(self,X,target): + def Kdiag(self, X): """Compute the diagonal of the covariance matrix associated to X.""" - target1 = np.zeros(X.shape[0]) - target2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,self.slice1],target1) - self.k2.Kdiag(X[:,self.slice2],target2) - target += target1 * target2 + return self.k1.Kdiag(X[:,self.slice1]) * self.k2.Kdiag(X[:,self.slice2]) + def update_gradients_sparse(self): + pass + #wtf goes here?? + #def dKdiag_dtheta(self,dL_dKdiag,X,target): + #K1 = np.zeros(X.shape[0]) + #K2 = np.zeros(X.shape[0]) + #self.k1.Kdiag(X[:,self.slice1],K1) + #self.k2.Kdiag(X[:,self.slice2],K2) + #self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) + #self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) - def dKdiag_dtheta(self,dL_dKdiag,X,target): - K1 = np.zeros(X.shape[0]) - K2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,self.slice1],K1) - self.k2.Kdiag(X[:,self.slice2],K2) - self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) - self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) - - def gradients_X(self,dL_dK,X,X2,target): + def gradients_X(self,dL_dK,X,X2): """derivative of the covariance matrix with respect to X.""" self._K_computations(X,X2) if X2 is None: - if not isinstance(self.k1,Coregionalize) and not isinstance(self.k2,Coregionalize): - self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) - self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) - else:#if isinstance(self.k1,Coregionalize) or isinstance(self.k2,Coregionalize): - #NOTE The indices column in the inputs makes the ki.gradients_X fail when passing None instead of X[:,self.slicei] - X2 = X - self.k1.gradients_X(2.*dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) - self.k2.gradients_X(2.*dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) + self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], None, target[:,self.slice1]) + self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], None, target[:,self.slice2]) else: self.k1.gradients_X(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:,self.slice1]) self.k2.gradients_X(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[:,self.slice2]) @@ -112,14 +84,10 @@ class Prod(Kern): self._params == self._get_params().copy() if X2 is None: self._X2 = None - self._K1 = np.zeros((X.shape[0],X.shape[0])) - self._K2 = np.zeros((X.shape[0],X.shape[0])) - self.k1.K(X[:,self.slice1],None,self._K1) - self.k2.K(X[:,self.slice2],None,self._K2) + self._K1 = self.k1.K(X[:,self.slice1],None) + self._K2 = self.k2.K(X[:,self.slice2],None) else: self._X2 = X2.copy() - self._K1 = np.zeros((X.shape[0],X2.shape[0])) - self._K2 = np.zeros((X.shape[0],X2.shape[0])) - self.k1.K(X[:,self.slice1],X2[:,self.slice1],self._K1) - self.k2.K(X[:,self.slice2],X2[:,self.slice2],self._K2) + self._K1 = self.k1.K(X[:,self.slice1],X2[:,self.slice1]) + self._K2 = self.k2.K(X[:,self.slice2],X2[:,self.slice2]) diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index 36e454e3..eb713433 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -5,10 +5,10 @@ import numpy as np from scipy import weave from kern import Kern -from ..util.linalg import tdot -from ..util.misc import fast_array_equal, param_to_array -from ..core.parameterization import Param -from ..core.parameterization.transformations import Logexp +from ...util.linalg import tdot +from ...util.misc import fast_array_equal, param_to_array +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp class RBF(Kern): """ diff --git a/GPy/kern/_src/white.py b/GPy/kern/_src/white.py index 7750267f..2be73389 100644 --- a/GPy/kern/_src/white.py +++ b/GPy/kern/_src/white.py @@ -3,8 +3,8 @@ from kern import Kern import numpy as np -from ..core.parameterization import Param -from ..core.parameterization.transformations import Logexp +from ...core.parameterization import Param +from ...core.parameterization.transformations import Logexp class White(Kern): """ @@ -25,6 +25,8 @@ class White(Kern): def K(self,X,X2): if X2 is None: return np.eye(X.shape[0])*self.variance + else: + return np.zeros((X.shape[0], X2.shape[0])) def Kdiag(self,X): ret = np.ones(X.shape[0]) diff --git a/GPy/models/gp_regression.py b/GPy/models/gp_regression.py index a72acc1a..f8957906 100644 --- a/GPy/models/gp_regression.py +++ b/GPy/models/gp_regression.py @@ -23,7 +23,7 @@ class GPRegression(GP): def __init__(self, X, Y, kernel=None): if kernel is None: - kernel = kern.rbf(X.shape[1]) + kernel = kern.RBF(X.shape[1]) likelihood = likelihoods.Gaussian() diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index b4f987ea..acc6e11a 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -7,7 +7,7 @@ from GPy.util.linalg import PCA import numpy import itertools import pylab -from GPy.kern.kern import Kern +from GPy.kern import Kern from GPy.models.bayesian_gplvm import BayesianGPLVM class MRD(Model): diff --git a/GPy/plotting/matplot_dep/kernel_plots.py b/GPy/plotting/matplot_dep/kernel_plots.py index 80350475..30157294 100644 --- a/GPy/plotting/matplot_dep/kernel_plots.py +++ b/GPy/plotting/matplot_dep/kernel_plots.py @@ -7,7 +7,7 @@ import pylab as pb import Tango from matplotlib.textpath import TextPath from matplotlib.transforms import offset_copy -from ...kern.linear import Linear +from ...kern import Linear def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False): diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index c9896116..75ba39d9 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -9,7 +9,7 @@ from ...util.misc import param_to_array def plot_fit(model, plot_limits=None, which_data_rows='all', - which_data_ycols='all', which_parts='all', fixed_inputs=[], + which_data_ycols='all', fixed_inputs=[], levels=20, samples=0, fignum=None, ax=None, resolution=None, plot_raw=False, linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']): @@ -20,7 +20,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', - In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed. Can plot only part of the data and part of the posterior functions - using which_data_rowsm which_data_ycols and which_parts + using which_data_rowsm which_data_ycols. :param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits :type plot_limits: np.array @@ -28,8 +28,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', :type which_data_rows: 'all' or a slice object to slice model.X, model.Y :param which_data_ycols: when the data has several columns (independant outputs), only plot these :type which_data_rows: 'all' or a list of integers - :param which_parts: which of the kernel functions to plot (additively) - :type which_parts: 'all', or list of bools :param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v. :type fixed_inputs: a list of tuples :param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D @@ -76,12 +74,12 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #make a prediction on the frame and plot it if plot_raw: - m, v = model._raw_predict(Xgrid, which_parts=which_parts) + m, v = model._raw_predict(Xgrid) lower = m - 2*np.sqrt(v) upper = m + 2*np.sqrt(v) Y = model.Y else: - m, v, lower, upper = model.predict(Xgrid, which_parts=which_parts) + m, v, lower, upper = model.predict(Xgrid) Y = model.Y for d in which_data_ycols: gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol) @@ -89,7 +87,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #optionally plot some samples if samples: #NOTE not tested with fixed_inputs - Ysim = model.posterior_samples(Xgrid, samples, which_parts=which_parts) + Ysim = model.posterior_samples(Xgrid, samples) for yi in Ysim.T: ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs. @@ -131,10 +129,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #predict on the frame and plot if plot_raw: - m, _ = model._raw_predict(Xgrid, which_parts=which_parts) + m, _ = model._raw_predict(Xgrid) Y = model.Y else: - m, _, _, _ = model.predict(Xgrid, which_parts=which_parts) + m, _, _, _ = model.predict(Xgrid) Y = model.data for d in which_data_ycols: m_d = m[:,d].reshape(resolution, resolution).T diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 059a39c3..23f5d0c8 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -513,8 +513,8 @@ def toy_rbf_1d(seed=default_seed, num_samples=500): num_in = 1 X = np.random.uniform(low= -1.0, high=1.0, size=(num_samples, num_in)) X.sort(axis=0) - rbf = GPy.kern.rbf(num_in, variance=1., lengthscale=np.array((0.25,))) - white = GPy.kern.white(num_in, variance=1e-2) + rbf = GPy.kern.RBF(num_in, variance=1., lengthscale=np.array((0.25,))) + white = GPy.kern.White(num_in, variance=1e-2) kernel = rbf + white K = kernel.K(X) y = np.reshape(np.random.multivariate_normal(np.zeros(num_samples), K), (num_samples, 1))