diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py index 0ea6b9fc..10dc55d7 100644 --- a/GPy/core/__init__.py +++ b/GPy/core/__init__.py @@ -1,9 +1,9 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from GP import GP -from sparse_GP import sparse_GP -from FITC import FITC from model import * from parameterised import * import priors +from GPy.core.gp import GP +from GPy.core.sparse_gp import SparseGP +from FITC import FITC diff --git a/GPy/core/GP.py b/GPy/core/gp.py similarity index 91% rename from GPy/core/GP.py rename to GPy/core/gp.py index 04ea7af1..19bd84ed 100644 --- a/GPy/core/GP.py +++ b/GPy/core/gp.py @@ -46,12 +46,12 @@ class GP(GPBase): #alpha = np.dot(self.Ki, self.likelihood.Y) alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1) - self.dL_dK = 0.5 * (tdot(alpha) - self.D * self.Ki) + self.dL_dK = 0.5 * (tdot(alpha) - self.input_dim * self.Ki) else: #tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki) tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1) tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1) - self.dL_dK = 0.5 * (tmp - self.D * self.Ki) + self.dL_dK = 0.5 * (tmp - self.input_dim * self.Ki) def _get_params(self): return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params())) @@ -89,7 +89,7 @@ class GP(GPBase): model for a new variable Y* = v_tilde/tau_tilde, with a covariance matrix K* = K + diag(1./tau_tilde) plus a normalization term. """ - return -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z + return -0.5 * self.input_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z def _log_likelihood_gradients(self): @@ -117,7 +117,7 @@ class GP(GPBase): var = Kxx - np.sum(np.multiply(KiKx, Kx), 0) var = var[:, None] if stop: - debug_this + debug_this # @UndefinedVariable return mu, var def predict(self, Xnew, which_parts='all', full_cov=False): @@ -131,12 +131,12 @@ class GP(GPBase): :type which_parts: ('all', list of bools) :param full_cov: whether to return the folll covariance matrix, or just the diagonal :type full_cov: bool - :rtype: posterior mean, a Numpy array, Nnew x self.D + :rtype: posterior mean, a Numpy array, Nnew x self.input_dim :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise - :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D + :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim - If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew. + If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. This is to allow for different normalizations of the output dimensions. """ diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index aa71b550..11b4726b 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -18,15 +18,15 @@ class GPBase(model.model): self.kern = kernel self.likelihood = likelihood assert self.X.shape[0] == self.likelihood.data.shape[0] - self.N, self.D = self.likelihood.data.shape + self.N, self.output_dim = self.likelihood.data.shape if normalize_X: self._Xmean = X.mean(0)[None, :] self._Xstd = X.std(0)[None, :] self.X = (X.copy() - self._Xmean) / self._Xstd else: - self._Xmean = np.zeros((1,self.input_dim)) - self._Xstd = np.ones((1,self.input_dim)) + self._Xmean = np.zeros((1, self.input_dim)) + self._Xstd = np.ones((1, self.input_dim)) model.model.__init__(self) @@ -70,7 +70,7 @@ class GPBase(model.model): else: m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True) Ysim = np.random.multivariate_normal(m.flatten(), v, samples) - gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None,], axes=ax) + gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax) for i in range(samples): ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25) ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5) @@ -108,19 +108,19 @@ class GPBase(model.model): if self.X.shape[1] == 1: - Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now + Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) m, var, lower, upper = self.predict(Xnew, which_parts=which_parts) for d in range(m.shape[1]): - gpplot(Xnew, m[:,d], lower[:,d], upper[:,d],axes=ax) - ax.plot(Xu[which_data], self.likelihood.data[which_data,d], 'kx', mew=1.5) + gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) + ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5) ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) - elif self.X.shape[1] == 2: # FIXME + elif self.X.shape[1] == 2: # FIXME resolution = resolution or 50 Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) diff --git a/GPy/core/model.py b/GPy/core/model.py index f9dcaae4..19e38080 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -23,7 +23,7 @@ class model(parameterised): self.priors = None self.optimization_runs = [] self.sampling_runs = [] - self.preferred_optimizer = 'tnc' + self.preferred_optimizer = 'scg' #self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes def _get_params(self): raise NotImplementedError, "this needs to be implemented to use the model class" diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py index c8d8ce4d..e091e820 100644 --- a/GPy/core/parameterised.py +++ b/GPy/core/parameterised.py @@ -85,7 +85,7 @@ class parameterised(object): else: return self._get_params()[matches] else: - raise AttributeError, "no parameter matches %s" % name + raise AttributeError, "no parameter matches %s" % regexp def __setitem__(self, name, val): """ diff --git a/GPy/core/priors.py b/GPy/core/priors.py index 7b6379de..43090ae3 100644 --- a/GPy/core/priors.py +++ b/GPy/core/priors.py @@ -99,9 +99,9 @@ class MultivariateGaussian: assert len(self.var.shape) == 2 assert self.var.shape[0] == self.var.shape[1] assert self.var.shape[0] == self.mu.size - self.D = self.mu.size + self.input_dim = self.mu.size self.inv, self.hld = pdinv(self.var) - self.constant = -0.5 * self.D * np.log(2 * np.pi) - self.hld + self.constant = -0.5 * self.input_dim * np.log(2 * np.pi) - self.hld def summary(self): raise NotImplementedError @@ -121,7 +121,7 @@ class MultivariateGaussian: return np.random.multivariate_normal(self.mu, self.var, n) def plot(self): - if self.D == 2: + if self.input_dim == 2: rvs = self.rvs(200) pb.plot(rvs[:, 0], rvs[:, 1], 'kx', mew=1.5) xmin, xmax = pb.xlim() diff --git a/GPy/core/sparse_GP.py b/GPy/core/sparse_gp.py similarity index 85% rename from GPy/core/sparse_GP.py rename to GPy/core/sparse_gp.py index c4fe6763..26870927 100644 --- a/GPy/core/sparse_GP.py +++ b/GPy/core/sparse_gp.py @@ -8,7 +8,7 @@ from scipy import linalg from ..likelihoods import Gaussian from gp_base import GPBase -class sparse_GP(GPBase): +class SparseGP(GPBase): """ Variational sparse GP model @@ -21,9 +21,9 @@ class sparse_GP(GPBase): :param X_variance: The uncertainty in the measurements of X (Gaussian variance) :type X_variance: np.ndarray (N x input_dim) | None :param Z: inducing inputs (optional, see note) - :type Z: np.ndarray (M x input_dim) | None - :param M : Number of inducing points (optional, default 10. Ignored if Z is not None) - :type M: int + :type Z: np.ndarray (num_inducing x input_dim) | None + :param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None) + :type num_inducing: int :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :type normalize_(X|Y): bool """ @@ -32,7 +32,7 @@ class sparse_GP(GPBase): GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) self.Z = Z - self.M = Z.shape[0] + self.num_inducing = Z.shape[0] self.likelihood = likelihood if X_variance is None: @@ -85,7 +85,7 @@ class sparse_GP(GPBase): # factor B - self.B = np.eye(self.M) + self.A + self.B = np.eye(self.num_inducing) + self.A self.LB = jitchol(self.B) # TODO: make a switch for either first compute psi1V, or VV.T @@ -99,16 +99,16 @@ class sparse_GP(GPBase): # Compute dL_dKmm tmp = tdot(self._LBi_Lmi_psi1V) - self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D * np.eye(self.M) + tmp) + self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.input_dim * np.eye(self.num_inducing) + tmp) tmp = -0.5 * self.DBi_plus_BiPBi - tmp += -0.5 * self.B * self.D - tmp += self.D * np.eye(self.M) + tmp += -0.5 * self.B * self.input_dim + tmp += self.input_dim * np.eye(self.num_inducing) self.dL_dKmm = backsub_both_sides(self.Lm, tmp) # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case - self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten() + self.dL_dpsi0 = -0.5 * self.input_dim * (self.likelihood.precision * np.ones([self.N, 1])).flatten() self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T) - dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.D * np.eye(self.M) - self.DBi_plus_BiPBi) + dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.input_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi) if self.likelihood.is_heteroscedastic: if self.has_uncertain_inputs: @@ -135,24 +135,24 @@ class sparse_GP(GPBase): raise NotImplementedError, "heteroscedatic derivates not implemented" else: # likelihood is not heterscedatic - self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 - self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision) + self.partial_for_likelihood = -0.5 * self.N * self.input_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 + self.partial_for_likelihood += 0.5 * self.input_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision) self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V))) def log_likelihood(self): """ Compute the (lower bound on the) log marginal likelihood """ if self.likelihood.is_heteroscedastic: - A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y) - B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) + A = -0.5 * self.N * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y) + B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) else: - A = -0.5 * self.N * self.D * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT - B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) - C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2)) + A = -0.5 * self.N * self.input_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT + B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) + C = -self.input_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) return A + B + C + D + self.likelihood.Z def _set_params(self, p): - self.Z = p[:self.M * self.input_dim].reshape(self.M, self.input_dim) + self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim) self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam]) self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:]) self._compute_kernel_matrices() @@ -221,7 +221,7 @@ class sparse_GP(GPBase): Bi, _ = linalg.lapack.flapack.dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work! symmetrify(Bi) - Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.M) - Bi) + Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi) if X_variance_new is None: Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) @@ -259,12 +259,12 @@ class sparse_GP(GPBase): :type which_parts: ('all', list of bools) :param full_cov: whether to return the folll covariance matrix, or just the diagonal :type full_cov: bool - :rtype: posterior mean, a Numpy array, Nnew x self.D + :rtype: posterior mean, a Numpy array, Nnew x self.input_dim :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise - :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D + :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim - If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew. + If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. This is to allow for different normalizations of the output dimensions. """ diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 5e3eb964..60726b1d 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -5,9 +5,9 @@ import numpy as np from matplotlib import pyplot as plt import GPy -from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM from GPy.util.datasets import swiss_roll_generated from GPy.core.transformations import logexp +from GPy.models.bayesian_gplvm import BayesianGPLVM default_seed = np.random.seed(123344) @@ -20,14 +20,14 @@ def BGPLVM(seed=default_seed): X = np.random.rand(N, Q) k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N), K, D).T + Y = np.random.multivariate_normal(np.zeros(N), K, Q).T k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True) + GPy.kern.white(Q) # k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q) # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M) + m = GPy.models.BayesianGPLVM(Y, Q, kernel=k, M=M) m.constrain_positive('(rbf|bias|noise|white|S)') # m.constrain_fixed('S', 1) @@ -105,7 +105,7 @@ def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False): kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) - m = Bayesian_GPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel) + m = BayesianGPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel) m.data_colors = c m.data_t = t @@ -129,7 +129,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k) Yn = Y - Y.mean(0) Yn /= Yn.std(0) - m = GPy.models.Bayesian_GPLVM(Yn, Q, kernel=kernel, M=M, **k) + m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, M=M, **k) m.data_labels = data['Y'][:N].argmax(axis=1) # m.constrain('variance|leng', logexp_clipped()) @@ -234,7 +234,7 @@ def bgplvm_simulation_matlab_compare(): from GPy import kern reload(mrd); reload(kern) k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) - m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, + m = BayesianGPLVM(Y, Q, init="PCA", M=M, kernel=k, # X=mu, # X_variance=S, _debug=False) @@ -259,7 +259,7 @@ def bgplvm_simulation(optimize='scg', Y = Ylist[0] k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q) - m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True) + m = BayesianGPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True) # m.constrain('variance|noise', logexp_clipped()) m.ensure_default_constraints() m['noise'] = Y.var() / 100. @@ -285,7 +285,7 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): reload(mrd); reload(kern) k = kern.linear(Q, [.05] * Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) - m = mrd.MRD(Ylist, Q=Q, M=M, kernels=k, initx="", initz='permute', **kw) + m = mrd.MRD(Ylist, input_dim=Q, M=M, kernels=k, initx="", initz='permute', **kw) for i, Y in enumerate(Ylist): m['{}_noise'.format(i + 1)] = Y.var() / 100. @@ -297,7 +297,7 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): if optimize: print "Optimizing Model:" - m.optimize('scg', messages=1, max_iters=5e4, max_f_eval=5e4) + m.optimize('scg', messages=1, max_iters=5e4, max_f_eval=5e4, gtol=.05) if plot: m.plot_X_1d("MRD Latent Space 1D") m.plot_scales("MRD Scales") @@ -313,7 +313,7 @@ def brendan_faces(): Yn /= Yn.std() m = GPy.models.GPLVM(Yn, Q) - # m = GPy.models.Bayesian_GPLVM(Yn, Q, M=100) + # m = GPy.models.BayesianGPLVM(Yn, Q, M=100) # optimize m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) @@ -380,7 +380,7 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True): # M = 30 # # kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) -# m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M) +# m = GPy.models.BayesianGPLVM(X, Q, kernel=kernel, M=M) # # m.scale_factor = 100.0 # m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)') # from sklearn import cluster diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 310a0691..be3a71bd 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -10,16 +10,16 @@ import numpy as np import GPy -def toy_rbf_1d(optim_iters=100): +def toy_rbf_1d(max_nb_eval_optim=100): """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" data = GPy.util.datasets.toy_rbf_1d() # create simple GP model - m = GPy.models.GP_regression(data['X'],data['Y']) + m = GPy.models.GPRegression(data['X'],data['Y']) # optimize m.ensure_default_constraints() - m.optimize(max_f_eval=optim_iters) + m.optimize(max_f_eval=max_nb_eval_optim) # plot m.plot() print(m) @@ -30,7 +30,7 @@ def rogers_girolami_olympics(optim_iters=100): data = GPy.util.datasets.rogers_girolami_olympics() # create simple GP model - m = GPy.models.GP_regression(data['X'],data['Y']) + m = GPy.models.GPRegression(data['X'],data['Y']) #set the lengthscale to be something sensible (defaults to 1) m['rbf_lengthscale'] = 10 @@ -49,7 +49,7 @@ def toy_rbf_1d_50(optim_iters=100): data = GPy.util.datasets.toy_rbf_1d_50() # create simple GP model - m = GPy.models.GP_regression(data['X'],data['Y']) + m = GPy.models.GPRegression(data['X'],data['Y']) # optimize m.ensure_default_constraints() @@ -65,7 +65,7 @@ def silhouette(optim_iters=100): data = GPy.util.datasets.silhouette() # create simple GP model - m = GPy.models.GP_regression(data['X'],data['Y']) + m = GPy.models.GPRegression(data['X'],data['Y']) # optimize m.ensure_default_constraints() @@ -87,9 +87,9 @@ def coregionalisation_toy2(optim_iters=100): Y = np.vstack((Y1,Y2)) k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) - k2 = GPy.kern.coregionalise(2,1) + k2 = GPy.kern.Coregionalise(2,1) k = k1.prod(k2,tensor=True) - m = GPy.models.GP_regression(X,Y,kernel=k) + m = GPy.models.GPRegression(X,Y,kernel=k) m.constrain_fixed('.*rbf_var',1.) #m.constrain_positive('.*kappa') m.ensure_default_constraints() @@ -119,9 +119,9 @@ def coregionalisation_toy(optim_iters=100): Y = np.vstack((Y1,Y2)) k1 = GPy.kern.rbf(1) - k2 = GPy.kern.coregionalise(2,2) + k2 = GPy.kern.Coregionalise(2,2) k = k1.prod(k2,tensor=True) - m = GPy.models.GP_regression(X,Y,kernel=k) + m = GPy.models.GPRegression(X,Y,kernel=k) m.constrain_fixed('.*rbf_var',1.) #m.constrain_positive('kappa') m.ensure_default_constraints() @@ -155,10 +155,10 @@ def coregionalisation_sparse(optim_iters=100): Z = np.hstack((np.random.rand(M,1)*8,np.random.randint(0,2,M)[:,None])) k1 = GPy.kern.rbf(1) - k2 = GPy.kern.coregionalise(2,2) + k2 = GPy.kern.Coregionalise(2,2) k = k1.prod(k2,tensor=True) + GPy.kern.white(2,0.001) - m = GPy.models.sparse_GP_regression(X,Y,kernel=k,Z=Z) + m = GPy.models.SparseGPRegression(X,Y,kernel=k,Z=Z) m.scale_factor = 10000. m.constrain_fixed('.*rbf_var',1.) #m.constrain_positive('kappa') @@ -181,7 +181,7 @@ def coregionalisation_sparse(optim_iters=100): return m -def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000, optim_iters=100): +def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000, optim_iters=300): """Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher.""" # Contour over a range of length scales and signal/noise ratios. @@ -197,7 +197,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000 data['Y'] = data['Y'] - np.mean(data['Y']) lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf) - pb.contour(length_scales, log_SNRs, np.exp(lls), 20) + pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet) ax = pb.gca() pb.xlabel('length scale') pb.ylabel('log_10 SNR') @@ -211,18 +211,20 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000 optim_point_y = np.empty(2) np.random.seed(seed=seed) for i in range(0, model_restarts): - kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + GPy.kern.white(1,variance=np.random.exponential(1.)) + #kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3,1), lengthscale=np.random.uniform(5,50)) - m = GPy.models.GP_regression(data['X'],data['Y'], kernel=kern) - optim_point_x[0] = m.get('rbf_lengthscale') - optim_point_y[0] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance')); + m = GPy.models.GPRegression(data['X'],data['Y'], kernel=kern) + m['noise_variance'] = np.random.uniform(1e-3,1) + optim_point_x[0] = m['rbf_lengthscale'] + optim_point_y[0] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']); # optimize m.ensure_default_constraints() - m.optimize(xtol=1e-6, ftol=1e-6, max_f_eval=optim_iters) + m.optimize('scg', xtol=1e-6, ftol=1e-6, max_f_eval=optim_iters) - optim_point_x[1] = m.get('rbf_lengthscale') - optim_point_y[1] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance')); + optim_point_x[1] = m['rbf_lengthscale'] + optim_point_y[1] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']); pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1]-optim_point_x[0], optim_point_y[1]-optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k') models.append(m) @@ -231,39 +233,32 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000 ax.set_ylim(ylim) return (models, lls) -def _contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf): +def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf): """Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales. :data_set: A data set from the utils.datasets director. :length_scales: a list of length scales to explore for the contour plot. :log_SNRs: a list of base 10 logarithm signal to noise ratios to explore for the contour plot. - :signal_kernel: a kernel to use for the 'signal' portion of the data.""" + :kernel: a kernel to use for the 'signal' portion of the data.""" lls = [] total_var = np.var(data['Y']) + kernel = kernel_call(1, variance=1., lengthscale=1.) + model = GPy.models.GPRegression(data['X'], data['Y'], kernel=kernel) for log_SNR in log_SNRs: - SNR = 10**log_SNR + SNR = 10.**log_SNR + noise_var = total_var/(1.+SNR) + signal_var = total_var - noise_var + model.kern['.*variance'] = signal_var + model['noise_variance'] = noise_var length_scale_lls = [] + for length_scale in length_scales: - noise_var = 1. - signal_var = SNR - noise_var = noise_var/(noise_var + signal_var)*total_var - signal_var = signal_var/(noise_var + signal_var)*total_var - - signal_kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale) - noise_kernel = GPy.kern.white(1, variance=noise_var) - kernel = signal_kernel + noise_kernel - K = kernel.K(data['X']) - total_var = (np.dot(np.dot(data['Y'].T,GPy.util.linalg.pdinv(K)[0]), data['Y'])/data['Y'].shape[0])[0,0] - noise_var *= total_var - signal_var *= total_var - - kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale) + GPy.kern.white(1, variance=noise_var) - - model = GPy.models.GP_regression(data['X'], data['Y'], kernel=kernel) - model.constrain_positive('') + model['.*lengthscale'] = length_scale length_scale_lls.append(model.log_likelihood()) + lls.append(length_scale_lls) + return np.array(lls) def sparse_GP_regression_1D(N = 400, M = 5, optim_iters=100): @@ -276,7 +271,7 @@ def sparse_GP_regression_1D(N = 400, M = 5, optim_iters=100): noise = GPy.kern.white(1) kernel = rbf + noise # create simple GP model - m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M) + m = GPy.models.SparseGPRegression(X, Y, kernel, M=M) m.ensure_default_constraints() @@ -296,7 +291,7 @@ def sparse_GP_regression_2D(N = 400, M = 50, optim_iters=100): kernel = rbf + noise # create simple GP model - m = GPy.models.sparse_GP_regression(X,Y,kernel, M = M) + m = GPy.models.SparseGPRegression(X,Y,kernel, M = M) # contrain all parameters to be positive (but not inducing inputs) m.ensure_default_constraints() @@ -325,7 +320,7 @@ def uncertain_inputs_sparse_regression(optim_iters=100): k = GPy.kern.rbf(1) + GPy.kern.white(1) # create simple GP model - no input uncertainty on this one - m = GPy.models.sparse_GP_regression(X, Y, kernel=k, Z=Z) + m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z) m.ensure_default_constraints() m.optimize('scg', messages=1, max_f_eval=optim_iters) m.plot(ax=axes[0]) @@ -333,7 +328,7 @@ def uncertain_inputs_sparse_regression(optim_iters=100): #the same model with uncertainty - m = GPy.models.sparse_GP_regression(X, Y, kernel=k, Z=Z, X_variance=S) + m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S) m.ensure_default_constraints() m.optimize('scg', messages=1, max_f_eval=optim_iters) m.plot(ax=axes[1]) diff --git a/GPy/examples/tutorials.py b/GPy/examples/tutorials.py index 5d2dd41c..bd403ae8 100644 --- a/GPy/examples/tutorials.py +++ b/GPy/examples/tutorials.py @@ -19,7 +19,7 @@ def tuto_GP_regression(): kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.) - m = GPy.models.GP_regression(X,Y,kernel) + m = GPy.models.GPRegression(X, Y, kernel) print m m.plot() @@ -47,7 +47,7 @@ def tuto_GP_regression(): ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2) # create simple GP model - m = GPy.models.GP_regression(X,Y,ker) + m = GPy.models.GPRegression(X, Y, ker) # contrain all parameters to be positive m.constrain_positive('') @@ -145,7 +145,7 @@ def tuto_kernel_overview(): Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:]) # Create GP regression model - m = GPy.models.GP_regression(X,Y,Kanova) + m = GPy.models.GPRegression(X, Y, Kanova) pb.figure(figsize=(5,5)) m.plot() @@ -196,5 +196,5 @@ def model_interaction(): X = np.random.randn(20,1) Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5. k = GPy.kern.rbf(1) + GPy.kern.bias(1) - return GPy.models.GP_regression(X,Y,kernel=k) + return GPy.models.GPRegression(X, Y, kernel=k) diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py index e208392b..433d5f41 100644 --- a/GPy/inference/optimization.py +++ b/GPy/inference/optimization.py @@ -1,18 +1,16 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -import pdb import pylab as pb import datetime as dt from scipy import optimize -import numpy as np try: import rasmussens_minimize as rasm rasm_available = True except ImportError: rasm_available = False -from SCG import SCG +from scg import SCG class Optimizer(): """ @@ -51,9 +49,9 @@ class Optimizer(): start = dt.datetime.now() self.opt(**kwargs) end = dt.datetime.now() - self.time = str(end-start) + self.time = str(end - start) - def opt(self, f_fp = None, f = None, fp = None): + def opt(self, f_fp=None, f=None, fp=None): raise NotImplementedError, "this needs to be implemented to use the optimizer class" def plot(self): @@ -78,7 +76,7 @@ class opt_tnc(Optimizer): Optimizer.__init__(self, *args, **kwargs) self.opt_name = "TNC (Scipy implementation)" - def opt(self, f_fp = None, f = None, fp = None): + def opt(self, f_fp=None, f=None, fp=None): """ Run the TNC optimizer @@ -96,8 +94,8 @@ class opt_tnc(Optimizer): if self.gtol is not None: opt_dict['pgtol'] = self.gtol - opt_result = optimize.fmin_tnc(f_fp, self.x_init, messages = self.messages, - maxfun = self.max_f_eval, **opt_dict) + opt_result = optimize.fmin_tnc(f_fp, self.x_init, messages=self.messages, + maxfun=self.max_f_eval, **opt_dict) self.x_opt = opt_result[0] self.f_opt = f_fp(self.x_opt)[0] self.funct_eval = opt_result[1] @@ -108,7 +106,7 @@ class opt_lbfgsb(Optimizer): Optimizer.__init__(self, *args, **kwargs) self.opt_name = "L-BFGS-B (Scipy implementation)" - def opt(self, f_fp = None, f = None, fp = None): + def opt(self, f_fp=None, f=None, fp=None): """ Run the optimizer @@ -130,8 +128,8 @@ class opt_lbfgsb(Optimizer): if self.gtol is not None: opt_dict['pgtol'] = self.gtol - opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint = iprint, - maxfun = self.max_f_eval, **opt_dict) + opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint, + maxfun=self.max_f_eval, **opt_dict) self.x_opt = opt_result[0] self.f_opt = f_fp(self.x_opt)[0] self.funct_eval = opt_result[2]['funcalls'] @@ -142,12 +140,12 @@ class opt_simplex(Optimizer): Optimizer.__init__(self, *args, **kwargs) self.opt_name = "Nelder-Mead simplex routine (via Scipy)" - def opt(self, f_fp = None, f = None, fp = None): + def opt(self, f_fp=None, f=None, fp=None): """ The simplex optimizer does not require gradients. """ - statuses = ['Converged', 'Maximum number of function evaluations made','Maximum number of iterations reached'] + statuses = ['Converged', 'Maximum number of function evaluations made', 'Maximum number of iterations reached'] opt_dict = {} if self.xtol is not None: @@ -157,8 +155,8 @@ class opt_simplex(Optimizer): if self.gtol is not None: print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it" - opt_result = optimize.fmin(f, self.x_init, (), disp = self.messages, - maxfun = self.max_f_eval, full_output=True, **opt_dict) + opt_result = optimize.fmin(f, self.x_init, (), disp=self.messages, + maxfun=self.max_f_eval, full_output=True, **opt_dict) self.x_opt = opt_result[0] self.f_opt = opt_result[1] @@ -172,7 +170,7 @@ class opt_rasm(Optimizer): Optimizer.__init__(self, *args, **kwargs) self.opt_name = "Rasmussen's Conjugate Gradient" - def opt(self, f_fp = None, f = None, fp = None): + def opt(self, f_fp=None, f=None, fp=None): """ Run Rasmussen's Conjugate Gradient optimizer """ @@ -189,8 +187,8 @@ class opt_rasm(Optimizer): if self.gtol is not None: print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it" - opt_result = rasm.minimize(self.x_init, f_fp, (), messages = self.messages, - maxnumfuneval = self.max_f_eval) + opt_result = rasm.minimize(self.x_init, f_fp, (), messages=self.messages, + maxnumfuneval=self.max_f_eval) self.x_opt = opt_result[0] self.f_opt = opt_result[1][-1] self.funct_eval = opt_result[2] @@ -203,7 +201,7 @@ class opt_SCG(Optimizer): Optimizer.__init__(self, *args, **kwargs) self.opt_name = "Scaled Conjugate Gradients" - def opt(self, f_fp = None, f = None, fp = None): + def opt(self, f_fp=None, f=None, fp=None): assert not f is None assert not fp is None opt_result = SCG(f, fp, self.x_init, display=self.messages, @@ -218,7 +216,7 @@ class opt_SCG(Optimizer): self.status = opt_result[3] def get_optimizer(f_min): - from SGD import opt_SGD + from sgd import opt_SGD optimizers = {'fmin_tnc': opt_tnc, 'simplex': opt_simplex, diff --git a/GPy/inference/SCG.py b/GPy/inference/scg.py similarity index 100% rename from GPy/inference/SCG.py rename to GPy/inference/scg.py diff --git a/GPy/inference/SGD.py b/GPy/inference/sgd.py similarity index 98% rename from GPy/inference/SGD.py rename to GPy/inference/sgd.py index c2a77e40..2df09ec8 100644 --- a/GPy/inference/SGD.py +++ b/GPy/inference/sgd.py @@ -292,8 +292,8 @@ class opt_SGD(Optimizer): NLL = [] import pylab as plt for count, j in enumerate(features): - self.model.D = len(j) - self.model.likelihood.D = len(j) + self.model.input_dim = len(j) + self.model.likelihood.input_dim = len(j) self.model.likelihood.set_data(Y[:, j]) # self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision @@ -334,9 +334,9 @@ class opt_SGD(Optimizer): self.f_opt = np.mean(NLL) self.model.N = N self.model.X = X - self.model.D = D + self.model.input_dim = D self.model.likelihood.N = N - self.model.likelihood.D = D + self.model.likelihood.input_dim = D self.model.likelihood.Y = Y sigma = self.model.likelihood._variance self.model.likelihood._variance = None # invalidate cache diff --git a/GPy/kern/Brownian.py b/GPy/kern/Brownian.py index c5b19653..fe60ec59 100644 --- a/GPy/kern/Brownian.py +++ b/GPy/kern/Brownian.py @@ -13,14 +13,14 @@ class Brownian(kernpart): """ Brownian Motion kernel. - :param D: the number of input dimensions - :type D: int + :param input_dim: the number of input dimensions + :type input_dim: int :param variance: :type variance: float """ - def __init__(self,D,variance=1.): - self.D = D - assert self.D==1, "Brownian motion in 1D only" + def __init__(self,input_dim,variance=1.): + self.input_dim = input_dim + assert self.input_dim==1, "Brownian motion in 1D only" self.Nparam = 1. self.name = 'Brownian' self._set_params(np.array([variance]).flatten()) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 81dce75f..a71d38f4 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -2,7 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs +from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, Coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs try: from constructors import rbf_sympy, sympykern # these depend on sympy except: diff --git a/GPy/kern/bias.py b/GPy/kern/bias.py index 09f0afa9..9defbf95 100644 --- a/GPy/kern/bias.py +++ b/GPy/kern/bias.py @@ -7,14 +7,14 @@ import numpy as np import hashlib class bias(kernpart): - def __init__(self,D,variance=1.): + def __init__(self,input_dim,variance=1.): """ - :param D: the number of input dimensions - :type D: int + :param input_dim: the number of input dimensions + :type input_dim: int :param variance: the variance of the kernel :type variance: float """ - self.D = D + self.input_dim = input_dim self.Nparam = 1 self.name = 'bias' self._set_params(np.array([variance]).flatten()) diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index f2e4b57b..a6ed1145 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -21,7 +21,7 @@ from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part from prod import prod as prodpart from symmetric import symmetric as symmetric_part -from coregionalise import coregionalise as coregionalise_part +from coregionalise import Coregionalise as coregionalise_part from rational_quadratic import rational_quadratic as rational_quadraticpart from rbfcos import rbfcos as rbfcospart from independent_outputs import independent_outputs as independent_output_part @@ -33,8 +33,8 @@ def rbf(D,variance=1., lengthscale=None,ARD=False): """ Construct an RBF kernel - :param D: dimensionality of the kernel, obligatory - :type D: int + :param input_dim: dimensionality of the kernel, obligatory + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -51,7 +51,7 @@ def linear(D,variances=None,ARD=False): Arguments --------- - D (int), obligatory + input_dimD (int), obligatory variances (np.ndarray) ARD (boolean) """ @@ -64,7 +64,7 @@ def white(D,variance=1.): Arguments --------- - D (int), obligatory + input_dimD (int), obligatory variance (float) """ part = whitepart(D,variance) @@ -74,8 +74,8 @@ def exponential(D,variance=1., lengthscale=None, ARD=False): """ Construct an exponential kernel - :param D: dimensionality of the kernel, obligatory - :type D: int + :param input_dim: dimensionality of the kernel, obligatory + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -90,8 +90,8 @@ def Matern32(D,variance=1., lengthscale=None, ARD=False): """ Construct a Matern 3/2 kernel. - :param D: dimensionality of the kernel, obligatory - :type D: int + :param input_dim: dimensionality of the kernel, obligatory + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -106,8 +106,8 @@ def Matern52(D,variance=1., lengthscale=None, ARD=False): """ Construct a Matern 5/2 kernel. - :param D: dimensionality of the kernel, obligatory - :type D: int + :param input_dim: dimensionality of the kernel, obligatory + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -124,7 +124,7 @@ def bias(D,variance=1.): Arguments --------- - D (int), obligatory + input_dim (int), obligatory variance (float) """ part = biaspart(D,variance) @@ -133,7 +133,7 @@ def bias(D,variance=1.): def finite_dimensional(D,F,G,variances=1.,weights=None): """ Construct a finite dimensional kernel. - D: int - the number of input dimensions + input_dim: int - the number of input dimensions F: np.array of functions with shape (n,) - the n basis functions G: np.array with shape (n,n) - the Gram matrix associated to F variances : np.ndarray with shape (n,) @@ -145,8 +145,8 @@ def spline(D,variance=1.): """ Construct a spline kernel. - :param D: Dimensionality of the kernel - :type D: int + :param input_dim: Dimensionality of the kernel + :type input_dim: int :param variance: the variance of the kernel :type variance: float """ @@ -157,8 +157,8 @@ def Brownian(D,variance=1.): """ Construct a Brownian motion kernel. - :param D: Dimensionality of the kernel - :type D: int + :param input_dim: Dimensionality of the kernel + :type input_dim: int :param variance: the variance of the kernel :type variance: float """ @@ -204,8 +204,8 @@ def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_fre """ Construct an periodic exponential kernel - :param D: dimensionality, only defined for D=1 - :type D: int + :param input_dim: dimensionality, only defined for input_dim=1 + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -222,8 +222,8 @@ def periodic_Matern32(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10, """ Construct a periodic Matern 3/2 kernel. - :param D: dimensionality, only defined for D=1 - :type D: int + :param input_dim: dimensionality, only defined for input_dim=1 + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -240,8 +240,8 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10, """ Construct a periodic Matern 5/2 kernel. - :param D: dimensionality, only defined for D=1 - :type D: int + :param input_dim: dimensionality, only defined for input_dim=1 + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -256,14 +256,14 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10, def prod(k1,k2,tensor=False): """ - Construct a product kernel over D from two kernels over D + Construct a product kernel over input_dim from two kernels over input_dim :param k1, k2: the kernels to multiply :type k1, k2: kernpart :rtype: kernel object """ part = prodpart(k1,k2,tensor) - return kern(part.D, [part]) + return kern(part.input_dim, [part]) def symmetric(k): """ @@ -273,7 +273,7 @@ def symmetric(k): k_.parts = [symmetric_part(p) for p in k.parts] return k_ -def coregionalise(Nout,R=1, W=None, kappa=None): +def Coregionalise(Nout,R=1, W=None, kappa=None): p = coregionalise_part(Nout,R,W,kappa) return kern(1,[p]) @@ -282,8 +282,8 @@ def rational_quadratic(D,variance=1., lengthscale=1., power=1.): """ Construct rational quadratic kernel. - :param D: the number of input dimensions - :type D: int (D=1 is the only value currently supported) + :param input_dim: the number of input dimensions + :type input_dim: int (input_dim=1 is the only value currently supported) :param variance: the variance :math:`\sigma^2` :type variance: float :param lengthscale: the lengthscale :math:`\ell` @@ -300,7 +300,7 @@ def fixed(D, K, variance=1.): Arguments --------- - D (int), obligatory + input_dim (int), obligatory K (np.array), obligatory variance (float) """ @@ -321,6 +321,6 @@ def independent_outputs(k): for sl in k.input_slices: assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)" parts = [independent_output_part(p) for p in k.parts] - return kern(k.D+1,parts) + return kern(k.input_dim+1,parts) diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py index a4d22c2d..7fe922fd 100644 --- a/GPy/kern/coregionalise.py +++ b/GPy/kern/coregionalise.py @@ -7,12 +7,12 @@ from GPy.util.linalg import mdot, pdinv import pdb from scipy import weave -class coregionalise(kernpart): +class Coregionalise(kernpart): """ Kernel for Intrinsic Corregionalization Models """ def __init__(self,Nout,R=1, W=None, kappa=None): - self.D = 1 + self.input_dim = 1 self.name = 'coregion' self.Nout = Nout self.R = R diff --git a/GPy/kern/kern.py b/GPy/kern/kern.py index 0d3b4ccf..05af1997 100644 --- a/GPy/kern/kern.py +++ b/GPy/kern/kern.py @@ -11,14 +11,14 @@ from prod import prod from ..util.linalg import symmetrify class kern(parameterised): - def __init__(self, D, parts=[], input_slices=None): + def __init__(self, input_dim, parts=[], input_slices=None): """ This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where. The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This obnject contains a list of parts, which are computed additively. For multiplication, special _prod_ parts are used. - :param D: The dimensioality of the kernel's input space - :type D: int + :param input_dim: The dimensioality of the kernel's input space + :type input_dim: int :param parts: the 'parts' (PD functions) of the kernel :type parts: list of kernpart objects :param input_slices: the slices on the inputs which apply to each kernel @@ -29,7 +29,7 @@ class kern(parameterised): self.Nparts = len(parts) self.Nparam = sum([p.Nparam for p in self.parts]) - self.D = D + self.input_dim = input_dim # deal with input_slices if input_slices is None: @@ -96,10 +96,10 @@ class kern(parameterised): :type other: GPy.kern """ if tensor: - D = self.D + other.D - self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices] - other_input_indices = [sl.indices(other.D) for sl in other.input_slices] - other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices] + D = self.input_dim + other.input_dim + self_input_slices = [slice(*sl.indices(self.input_dim)) for sl in self.input_slices] + other_input_indices = [sl.indices(other.input_dim) for sl in other.input_slices] + other_input_slices = [slice(i[0] + self.input_dim, i[1] + self.input_dim, i[2]) for i in other_input_indices] newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices) @@ -111,8 +111,8 @@ class kern(parameterised): newkern.constraints = self.constraints + other.constraints newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] else: - assert self.D == other.D - newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices) + assert self.input_dim == other.input_dim + newkern = kern(self.input_dim, self.parts + other.parts, self.input_slices + other.input_slices) # transfer constraints: newkern.constrained_indices = self.constrained_indices + [i + self.Nparam for i in other.constrained_indices] newkern.constraints = self.constraints + other.constraints @@ -138,16 +138,16 @@ class kern(parameterised): slices = [] for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices): - s1, s2 = [False] * K1.D, [False] * K2.D + s1, s2 = [False] * K1.input_dim, [False] * K2.input_dim s1[sl1], s2[sl2] = [True], [True] slices += [s1 + s2] newkernparts = [prod(k1, k2, tensor) for k1, k2 in itertools.product(K1.parts, K2.parts)] if tensor: - newkern = kern(K1.D + K2.D, newkernparts, slices) + newkern = kern(K1.input_dim + K2.input_dim, newkernparts, slices) else: - newkern = kern(K1.D, newkernparts, slices) + newkern = kern(K1.input_dim, newkernparts, slices) newkern._follow_constrains(K1, K2) return newkern @@ -211,7 +211,7 @@ class kern(parameterised): def K(self, X, X2=None, which_parts='all'): if which_parts == 'all': which_parts = [True] * self.Nparts - assert X.shape[1] == self.D + assert X.shape[1] == self.input_dim if X2 is None: target = np.zeros((X.shape[0], X.shape[0])) [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used] @@ -225,11 +225,11 @@ class kern(parameterised): :param dL_dK: An array of dL_dK derivaties, dL_dK :type dL_dK: Np.ndarray (N x M) :param X: Observed data inputs - :type X: np.ndarray (N x D) + :type X: np.ndarray (N x input_dim) :param X2: Observed dara inputs (optional, defaults to X) - :type X2: np.ndarray (M x D) + :type X2: np.ndarray (M x input_dim) """ - assert X.shape[1] == self.D + assert X.shape[1] == self.input_dim target = np.zeros(self.Nparam) if X2 is None: [p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)] @@ -251,20 +251,20 @@ class kern(parameterised): def Kdiag(self, X, which_parts='all'): if which_parts == 'all': which_parts = [True] * self.Nparts - assert X.shape[1] == self.D + assert X.shape[1] == self.input_dim target = np.zeros(X.shape[0]) [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on] return target def dKdiag_dtheta(self, dL_dKdiag, X): - assert X.shape[1] == self.D + assert X.shape[1] == self.input_dim assert dL_dKdiag.size == X.shape[0] target = np.zeros(self.Nparam) [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] return self._transform_gradients(target) def dKdiag_dX(self, dL_dKdiag, X): - assert X.shape[1] == self.D + assert X.shape[1] == self.input_dim target = np.zeros_like(X) [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] return target @@ -386,7 +386,7 @@ class kern(parameterised): def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs): if which_parts == 'all': which_parts = [True] * self.Nparts - if self.D == 1: + if self.input_dim == 1: if x is None: x = np.zeros((1, 1)) else: @@ -408,7 +408,7 @@ class kern(parameterised): pb.xlabel("x") pb.ylabel("k(x,%0.1f)" % x) - elif self.D == 2: + elif self.input_dim == 2: if x is None: x = np.zeros((1, 2)) else: diff --git a/GPy/kern/kernpart.py b/GPy/kern/kernpart.py index 7de150e9..a4dfdd92 100644 --- a/GPy/kern/kernpart.py +++ b/GPy/kern/kernpart.py @@ -3,16 +3,16 @@ class kernpart(object): - def __init__(self,D): + def __init__(self,input_dim): """ The base class for a kernpart: a positive definite function which forms part of a kernel - :param D: the number of input dimensions to the function - :type D: int + :param input_dim: the number of input dimensions to the function + :type input_dim: int Do not instantiate. """ - self.D = D + self.input_dim = input_dim self.Nparam = 1 self.name = 'unnamed' diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py index 8744eda0..425b81bb 100644 --- a/GPy/kern/linear.py +++ b/GPy/kern/linear.py @@ -13,10 +13,10 @@ class linear(kernpart): .. math:: - k(x,y) = \sum_{i=1}^D \sigma^2_i x_iy_i + k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i - :param D: the number of input dimensions - :type D: int + :param input_dim: the number of input dimensions + :type input_dim: int :param variances: the vector of variances :math:`\sigma^2_i` :type variances: array or list of the appropriate size (or float if there is only one variance parameter) :param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension. @@ -24,8 +24,8 @@ class linear(kernpart): :rtype: kernel object """ - def __init__(self, D, variances=None, ARD=False): - self.D = D + def __init__(self, input_dim, variances=None, ARD=False): + self.input_dim = input_dim self.ARD = ARD if ARD == False: self.Nparam = 1 @@ -37,13 +37,13 @@ class linear(kernpart): variances = np.ones(1) self._Xcache, self._X2cache = np.empty(shape=(2,)) else: - self.Nparam = self.D + self.Nparam = self.input_dim self.name = 'linear' if variances is not None: variances = np.asarray(variances) - assert variances.size == self.D, "bad number of lengthscales" + assert variances.size == self.input_dim, "bad number of lengthscales" else: - variances = np.ones(self.D) + variances = np.ones(self.input_dim) self._set_params(variances.flatten()) # initialize cache @@ -82,7 +82,7 @@ class linear(kernpart): def dK_dtheta(self, dL_dK, X, X2, target): if self.ARD: if X2 is None: - [np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.D)] + [np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)] else: product = X[:, None, :] * X2[None, :, :] target += (dL_dK[:, :, None] * product).sum(0).sum(0) @@ -153,7 +153,7 @@ class linear(kernpart): # psi2_real[n, m, m_prime] = np.dot(tmp, ( # self._Z[m_prime:m_prime + 1] * self.variances).T) # mu2_S = (self._mu[:, None, :] * self._mu[:, :, None]) -# mu2_S[:, np.arange(self.D), np.arange(self.D)] += self._S +# mu2_S[:, np.arange(self.input_dim), np.arange(self.input_dim)] += self._S # psi2 = (self.ZA[None, :, None, :] * mu2_S[:, None]).sum(-1) # psi2 = (psi2[:, :, None] * self.ZA[None, None]).sum(-1) # psi2_tensor = np.tensordot(self.ZZ[None, :, :, :] * np.square(self.variances), self.mu2_S[:, None, None, :], ((3), (3))).squeeze().T diff --git a/GPy/kern/prod.py b/GPy/kern/prod.py index f61906bb..4d1fe17f 100644 --- a/GPy/kern/prod.py +++ b/GPy/kern/prod.py @@ -22,14 +22,14 @@ class prod(kernpart): self.k1 = k1 self.k2 = k2 if tensor: - self.D = k1.D + k2.D - self.slice1 = slice(0,self.k1.D) - self.slice2 = slice(self.k1.D,self.k1.D+self.k2.D) + self.input_dim = k1.input_dim + k2.input_dim + self.slice1 = slice(0,self.k1.input_dim) + self.slice2 = slice(self.k1.input_dim,self.k1.input_dim+self.k2.input_dim) else: - assert k1.D == k2.D, "Error: The input spaces of the kernels to sum don't have the same dimension." - self.D = k1.D - self.slice1 = slice(0,self.D) - self.slice2 = slice(0,self.D) + assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to sum don't have the same dimension." + self.input_dim = k1.input_dim + self.slice1 = slice(0,self.input_dim) + self.slice2 = slice(0,self.input_dim) self._X, self._X2, self._params = np.empty(shape=(3,1)) self._set_params(np.hstack((k1._get_params(),k2._get_params()))) diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index a89b7d45..39cf878e 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -18,8 +18,8 @@ class rbf(kernpart): where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input. - :param D: the number of input dimensions - :type D: int + :param input_dim: the number of input dimensions + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the vector of lengthscale of the kernel @@ -31,8 +31,8 @@ class rbf(kernpart): .. Note: this object implements both the ARD and 'spherical' version of the function """ - def __init__(self,D,variance=1.,lengthscale=None,ARD=False): - self.D = D + def __init__(self,input_dim,variance=1.,lengthscale=None,ARD=False): + self.input_dim = input_dim self.name = 'rbf' self.ARD = ARD if not ARD: @@ -43,12 +43,12 @@ class rbf(kernpart): else: lengthscale = np.ones(1) else: - self.Nparam = self.D + 1 + self.Nparam = self.input_dim + 1 if lengthscale is not None: lengthscale = np.asarray(lengthscale) - assert lengthscale.size == self.D, "bad number of lengthscales" + assert lengthscale.size == self.input_dim, "bad number of lengthscales" else: - lengthscale = np.ones(self.D) + lengthscale = np.ones(self.input_dim) self._set_params(np.hstack((variance,lengthscale.flatten()))) @@ -100,7 +100,7 @@ class rbf(kernpart): code = """ int q,i,j; double tmp; - for(q=0; q self.N: self.YYT = np.dot(self.Y, self.Y.T) @@ -52,7 +52,7 @@ class Gaussian(likelihood): def _set_params(self, x): x = np.float64(x) - if self._variance != x: + if np.all(self._variance != x): if x == 0.: self.precision = None self.V = None @@ -68,9 +68,9 @@ class Gaussian(likelihood): """ mean = mu * self._scale + self._offset if full_cov: - if self.D > 1: + if self.input_dim > 1: raise NotImplementedError, "TODO" - # Note. for D>1, we need to re-normalise all the outputs independently. + # Note. for input_dim>1, we need to re-normalise all the outputs independently. # This will mess up computations of diag(true_var), below. # note that the upper, lower quantiles should be the same shape as mean # Augment the output variance with the likelihood variance and rescale. diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index 00100d17..c801b9a9 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -10,12 +10,12 @@ from ..util.plot import gpplot from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf import link_functions -class likelihood_function(object): +class LikelihoodFunction(object): """ Likelihood class for doing Expectation propagation :param Y: observed output (Nx1 numpy.darray) - ..Note:: Y values allowed depend on the likelihood_function used + ..Note:: Y values allowed depend on the LikelihoodFunction used """ def __init__(self,link): if link == self._analytical: @@ -69,7 +69,7 @@ class likelihood_function(object): sigma2_hat = m2 - mu_hat**2 # Second central moment return float(Z_hat), float(mu_hat), float(sigma2_hat) -class binomial(likelihood_function): +class Binomial(LikelihoodFunction): """ Probit likelihood Y is expected to take values in {-1,1} @@ -82,7 +82,7 @@ class binomial(likelihood_function): self._analytical = link_functions.probit if not link: link = self._analytical - super(binomial, self).__init__(link) + super(Binomial, self).__init__(link) def _distribution(self,gp,obs): pass @@ -134,7 +134,7 @@ class binomial(likelihood_function): p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var)) return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var -class Poisson(likelihood_function): +class Poisson(LikelihoodFunction): """ Poisson likelihood Y is expected to take values in {0,1,2,...} diff --git a/GPy/models/FITC.py b/GPy/models/FITC.py deleted file mode 100644 index e8078780..00000000 --- a/GPy/models/FITC.py +++ /dev/null @@ -1,252 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - -import numpy as np -import pylab as pb -from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify,pdinv -from ..util.plot import gpplot -from .. import kern -from scipy import stats, linalg -from ..core import sparse_GP - -def backsub_both_sides(L,X): - """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky""" - tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1) - return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T - -class FITC(sparse_GP): - - def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): - super(FITC, self).__init__(X, likelihood, kernel, normalize_X=normalize_X) - - def update_likelihood_approximation(self): - """ - Approximates a non-gaussian likelihood using Expectation Propagation - - For a Gaussian (or direct: TODO) likelihood, no iteration is required: - this function does nothing - - Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP. - The true precison is now 'true_precision' not 'precision'. - """ - if self.has_uncertain_inputs: - raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" - else: - self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) - self._set_params(self._get_params()) # update the GP - - def _computations(self): - - #factor Kmm - self.Lm = jitchol(self.Kmm) - self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1) - Lmipsi1 = np.dot(self.Lmi,self.psi1) - self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy() - self.Diag0 = self.psi0 - np.diag(self.Qnn) - self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision - self.V_star = self.beta_star * self.likelihood.Y - - # The rather complex computations of self.A - if self.has_uncertain_inputs: - raise NotImplementedError - else: - if self.likelihood.is_heteroscedastic: - assert self.likelihood.D == 1 - tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N))) - tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) - self.A = tdot(tmp) - - # factor B - self.B = np.eye(self.M) + self.A - self.LB = jitchol(self.B) - self.LBi = chol_inv(self.LB) - self.psi1V = np.dot(self.psi1, self.V_star) - - Lmi_psi1V, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0) - self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0) - - Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1) - b_psi1_Ki = self.beta_star * Kmmipsi1.T - Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki) - Kmmi = np.dot(self.Lmi.T,self.Lmi) - LBiLmi = np.dot(self.LBi,self.Lmi) - LBL_inv = np.dot(LBiLmi.T,LBiLmi) - VVT = np.outer(self.V_star,self.V_star) - VV_p_Ki = np.dot(VVT,Kmmipsi1.T) - Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki) - psi1beta = self.psi1*self.beta_star.T - H = self.Kmm + mdot(self.psi1,psi1beta.T) - LH = jitchol(H) - LHi = chol_inv(LH) - Hi = np.dot(LHi.T,LHi) - - betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T) - alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None] - gamma_1 = mdot(VVT,self.psi1.T,Hi) - pHip = mdot(self.psi1.T,Hi,self.psi1) - gamma_2 = mdot(self.beta_star*pHip,self.V_star) - gamma_3 = self.V_star * gamma_2 - - self._dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0: logdet(self.beta_star) - self._dL_dpsi0 += .5 * self.V_star**2 #dA_psi0: yT*beta_star*y - self._dL_dpsi0 += .5 *alpha #dC_dpsi0 - self._dL_dpsi0 += 0.5*mdot(self.beta_star*pHip,self.V_star)**2 - self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T #dD_dpsi0 - - self._dL_dpsi1 = b_psi1_Ki.copy() #dA_dpsi1: logdet(self.beta_star) - self._dL_dpsi1 += -np.dot(psi1beta.T,LBL_inv) #dC_dpsi1 - self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1 - - self._dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm: logdet(self.beta_star) - self._dL_dKmm += .5*(LBL_inv - Kmmi) + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm - self._dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm - - self._dpsi1_dtheta = 0 - self._dpsi1_dX = 0 - self._dKmm_dtheta = 0 - self._dKmm_dX = 0 - - self._dpsi1_dX_jkj = 0 - self._dpsi1_dtheta_jkj = 0 - - for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3): - K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T) - - #Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1 - _dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:] - - #Diag_dKmm = Diag_dA_dKmm: yT*beta_star*y +Diag_dC_dKmm +Diag_dD_dKmm - _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm - - self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z) - self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z) - - self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z) - self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:]) - - # the partial derivative vector for the likelihood - if self.likelihood.Nparams == 0: - # save computation here. - self.partial_for_likelihood = None - elif self.likelihood.is_heteroscedastic: - raise NotImplementedError, "heteroscedatic derivates not implemented" - else: - # likelihood is not heterscedatic - dbstar_dnoise = self.likelihood.precision * (self.beta_star**2 * self.Diag0[:,None] - self.beta_star) - Lmi_psi1 = mdot(self.Lmi,self.psi1) - LBiLmipsi1 = np.dot(self.LBi,Lmi_psi1) - aux_0 = np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1) - aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1) - aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V) - - dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise) - dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) - dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) - - dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T) - alpha = mdot(LBiLmipsi1,self.V_star) - alpha_ = mdot(LBiLmipsi1.T,alpha) - dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise ) - - dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y) - dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star) - dD_dnoise = dD_dnoise_1 + dD_dnoise_2 - - self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise - - def log_likelihood(self): - """ Compute the (lower bound on the) log marginal likelihood """ - A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y) - C = -self.D * (np.sum(np.log(np.diag(self.LB)))) - D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) - return A + C + D - - def _log_likelihood_gradients(self): - pass - return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood))) - - def dL_dtheta(self): - if self.has_uncertain_inputs: - raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" - else: - dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X) - dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z) - dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z) - dL_dtheta += self._dKmm_dtheta - dL_dtheta += self._dpsi1_dtheta - return dL_dtheta - - def dL_dZ(self): - if self.has_uncertain_inputs: - raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" - else: - dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X) - dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z) - dL_dZ += self._dpsi1_dX - dL_dZ += self._dKmm_dX - return dL_dZ - - def _raw_predict(self, Xnew, which_parts, full_cov=False): - - if self.likelihood.is_heteroscedastic: - Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.likelihood.precision.flatten()) - self.Diag = self.Diag0 * Iplus_Dprod_i - self.P = Iplus_Dprod_i[:,None] * self.psi1.T - self.RPT0 = np.dot(self.Lmi,self.psi1) - self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T)) - self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1) - self.RPT = np.dot(self.R,self.P.T) - self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT) - self.w = self.Diag * self.likelihood.v_tilde - self.Gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde)) - self.mu = self.w + np.dot(self.P,self.Gamma) - - """ - Make a prediction for the generalized FITC model - - Arguments - --------- - X : Input prediction data - Nx1 numpy array (floats) - """ - # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T) - - # Ci = I + (RPT0)Di(RPT0).T - # C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T - # = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T - # = I - [RPT0] * (U*U.T)^-1 * [RPT0].T - # = I - V.T * V - U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn) - V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1) - C = np.eye(self.M) - np.dot(V.T,V) - mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:]) - #self.C = C - #self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T - #self.mu_u = mu_u - #self.U = U - # q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T) - mu_H = np.dot(mu_u,self.mu) - self.mu_H = mu_H - Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T)) - # q(f_star|y) = N(f_star|mu_star,sigma2_star) - Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) - KR0T = np.dot(Kx.T,self.Lmi.T) - mu_star = np.dot(KR0T,mu_H) - if full_cov: - Kxx = self.kern.K(Xnew,which_parts=which_parts) - var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) - else: - Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts) - var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None] - return mu_star[:,None],var - else: - raise NotImplementedError, "homoscedastic fitc not implemented" - """ - Kx = self.kern.K(self.Z, Xnew) - mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) - if full_cov: - Kxx = self.kern.K(Xnew) - var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting - else: - Kxx = self.kern.Kdiag(Xnew) - var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) - return mu,var[:,None] - """ diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index eeca5993..281d8a34 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -1,14 +1,11 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) - -from GP_regression import GP_regression -from GP_classification import GP_classification -from sparse_GP_regression import sparse_GP_regression -from sparse_GP_classification import sparse_GP_classification -from FITC_classification import FITC_classification -from GPLVM import GPLVM -from warped_GP import warpedGP -from sparse_GPLVM import sparse_GPLVM -from Bayesian_GPLVM import Bayesian_GPLVM +from gp_regression import GPRegression +from sparse_gp_regression import SparseGPRegression +from sparse_gp_classification import SparseGPClassification +from fitc_classification import FITCClassification +from gplvm import GPLVM +from warped_gp import WarpedGP +from bayesian_gplvm import BayesianGPLVM from mrd import MRD diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/bayesian_gplvm.py similarity index 92% rename from GPy/models/Bayesian_GPLVM.py rename to GPy/models/bayesian_gplvm.py index e69c4840..7008e9a3 100644 --- a/GPy/models/Bayesian_GPLVM.py +++ b/GPy/models/bayesian_gplvm.py @@ -2,21 +2,16 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -import pylab as pb -import sys, pdb -from GPLVM import GPLVM -from ..core import sparse_GP -from GPy.util.linalg import pdinv +from ..core import SparseGP from ..likelihoods import Gaussian from .. import kern -from numpy.linalg.linalg import LinAlgError import itertools from matplotlib.colors import colorConverter -from matplotlib.figure import SubplotParams from GPy.inference.optimization import SCG from GPy.util import plot_latent +from GPy.models.gplvm import GPLVM -class Bayesian_GPLVM(sparse_GP, GPLVM): +class BayesianGPLVM(SparseGP, GPLVM): """ Bayesian Gaussian Process Latent Variable Model @@ -64,7 +59,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): self._savedpsiKmm = [] self._savedABCD = [] - sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs) + SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs) self._set_params(self._get_params()) @property @@ -80,19 +75,19 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): def _get_param_names(self): X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) - return (X_names + S_names + sparse_GP._get_param_names(self)) + return (X_names + S_names + SparseGP._get_param_names(self)) def _get_params(self): """ Horizontally stacks the parameters in order to present them to the optimizer. - The resulting 1-D array has this structure: + The resulting 1-input_dim array has this structure: =============================================================== | mu | S | Z | theta | beta | =============================================================== """ - x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self))) + x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self))) return x def _clipped(self, x): @@ -104,7 +99,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): N, input_dim = self.N, self.input_dim self.X = x[:self.X.size].reshape(N, input_dim).copy() self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy() - sparse_GP._set_params(self, x[(2 * N * input_dim):]) + SparseGP._set_params(self, x[(2 * N * input_dim):]) # self.oldps = x # except (LinAlgError, FloatingPointError, ZeroDivisionError): # print "\rWARNING: Caught LinAlgError, continueing without setting " @@ -134,7 +129,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.N def log_likelihood(self): - ll = sparse_GP.log_likelihood(self) + ll = SparseGP.log_likelihood(self) kl = self.KL_divergence() # if ll < -2E4: @@ -151,14 +146,14 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]]) # sf2 = self.scale_factor ** 2 if self.likelihood.is_heteroscedastic: - A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y) -# B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2) - B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) + A = -0.5 * self.N * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y) +# B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2) + B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) else: - A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT -# B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2) - B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) - C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2)) + A = -0.5 * self.N * self.input_dim * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT +# B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2) + B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) + C = -self.input_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) self._savedABCD.append([self.f_call, A, B, C, D]) @@ -181,7 +176,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): # d_dS = (dL_dS).flatten() # ======================== self.dbound_dmuS = np.hstack((d_dmu, d_dS)) - self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self) + self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self) return self._clipped(np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))) def plot_latent(self, *args, **kwargs): @@ -200,7 +195,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): means = np.zeros((N_test, input_dim)) covars = np.zeros((N_test, input_dim)) - dpsi0 = -0.5 * self.D * self.likelihood.precision + dpsi0 = -0.5 * self.input_dim * self.likelihood.precision dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods V = self.likelihood.precision * Y dpsi1 = np.dot(self.Cpsi1V, V.T) @@ -263,7 +258,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): def __getstate__(self): return (self.likelihood, self.input_dim, self.X, self.X_variance, - self.init, self.M, self.Z, self.kern, + self.init, self.num_inducing, self.Z, self.kern, self.oldpsave, self._debug) def __setstate__(self, state): @@ -274,8 +269,8 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): X = x[start:end].reshape(self.N, self.input_dim) start, end = end, end + self.X_variance.size X_v = x[start:end].reshape(self.N, self.input_dim) - start, end = end, end + (self.M * self.input_dim) - Z = x[start:end].reshape(self.M, self.input_dim) + start, end = end, end + (self.num_inducing * self.input_dim) + Z = x[start:end].reshape(self.num_inducing, self.input_dim) start, end = end, end + self.input_dim theta = x[start:] return X, X_v, Z, theta @@ -353,12 +348,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6))) fig = figs[-1] ax8 = fig.add_subplot(121) - ax8.text(.5, .5, r"${\mathbf{A,B,C,D}}$", color='k', alpha=.5, transform=ax8.transAxes, + ax8.text(.5, .5, r"${\mathbf{A,B,C,input_dim}}$", color='k', alpha=.5, transform=ax8.transAxes, ha='center', va='center') ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 1], label='A') ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 2], label='B') ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 3], label='C') - ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='D') + ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='input_dim') ax8.legend() figs[-1].canvas.draw() figs[-1].tight_layout(rect=(.15, 0, 1, .86)) diff --git a/GPy/models/fitc.py b/GPy/models/fitc.py new file mode 100644 index 00000000..785ea46c --- /dev/null +++ b/GPy/models/fitc.py @@ -0,0 +1,252 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +import pylab as pb +from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify, pdinv +from ..util.plot import gpplot +from .. import kern +from scipy import stats, linalg +from GPy.core.sparse_gp import SparseGP + +def backsub_both_sides(L, X): + """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky""" + tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1) + return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T + +class FITC(SparseGP): + + def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): + super(FITC, self).__init__(X, likelihood, kernel, normalize_X=normalize_X) + + def update_likelihood_approximation(self): + """ + Approximates a non-gaussian likelihood using Expectation Propagation + + For a Gaussian (or direct: TODO) likelihood, no iteration is required: + this function does nothing + + Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in SparseGP. + The true precison is now 'true_precision' not 'precision'. + """ + if self.has_uncertain_inputs: + raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" + else: + self.likelihood.fit_FITC(self.Kmm, self.psi1, self.psi0) + self._set_params(self._get_params()) # update the GP + + def _computations(self): + + # factor Kmm + self.Lm = jitchol(self.Kmm) + self.Lmi, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.eye(self.num_inducing), lower=1) + Lmipsi1 = np.dot(self.Lmi, self.psi1) + self.Qnn = np.dot(Lmipsi1.T, Lmipsi1).copy() + self.Diag0 = self.psi0 - np.diag(self.Qnn) + self.beta_star = self.likelihood.precision / (1. + self.likelihood.precision * self.Diag0[:, None]) # Includes Diag0 in the precision + self.V_star = self.beta_star * self.likelihood.Y + + # The rather complex computations of self.A + if self.has_uncertain_inputs: + raise NotImplementedError + else: + if self.likelihood.is_heteroscedastic: + assert self.likelihood.input_dim == 1 + tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N))) + tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) + self.A = tdot(tmp) + + # factor B + self.B = np.eye(self.num_inducing) + self.A + self.LB = jitchol(self.B) + self.LBi = chol_inv(self.LB) + self.psi1V = np.dot(self.psi1, self.V_star) + + Lmi_psi1V, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0) + self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0) + + Kmmipsi1 = np.dot(self.Lmi.T, Lmipsi1) + b_psi1_Ki = self.beta_star * Kmmipsi1.T + Ki_pbp_Ki = np.dot(Kmmipsi1, b_psi1_Ki) + Kmmi = np.dot(self.Lmi.T, self.Lmi) + LBiLmi = np.dot(self.LBi, self.Lmi) + LBL_inv = np.dot(LBiLmi.T, LBiLmi) + VVT = np.outer(self.V_star, self.V_star) + VV_p_Ki = np.dot(VVT, Kmmipsi1.T) + Ki_pVVp_Ki = np.dot(Kmmipsi1, VV_p_Ki) + psi1beta = self.psi1 * self.beta_star.T + H = self.Kmm + mdot(self.psi1, psi1beta.T) + LH = jitchol(H) + LHi = chol_inv(LH) + Hi = np.dot(LHi.T, LHi) + + betapsi1TLmiLBi = np.dot(psi1beta.T, LBiLmi.T) + alpha = np.array([np.dot(a.T, a) for a in betapsi1TLmiLBi])[:, None] + gamma_1 = mdot(VVT, self.psi1.T, Hi) + pHip = mdot(self.psi1.T, Hi, self.psi1) + gamma_2 = mdot(self.beta_star * pHip, self.V_star) + gamma_3 = self.V_star * gamma_2 + + self._dL_dpsi0 = -0.5 * self.beta_star # dA_dpsi0: logdet(self.beta_star) + self._dL_dpsi0 += .5 * self.V_star ** 2 # dA_psi0: yT*beta_star*y + self._dL_dpsi0 += .5 * alpha # dC_dpsi0 + self._dL_dpsi0 += 0.5 * mdot(self.beta_star * pHip, self.V_star) ** 2 - self.V_star * mdot(self.V_star.T, pHip * self.beta_star).T # dD_dpsi0 + + self._dL_dpsi1 = b_psi1_Ki.copy() # dA_dpsi1: logdet(self.beta_star) + self._dL_dpsi1 += -np.dot(psi1beta.T, LBL_inv) # dC_dpsi1 + self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T, Hi, self.psi1, gamma_1) # dD_dpsi1 + + self._dL_dKmm = -0.5 * np.dot(Kmmipsi1, b_psi1_Ki) # dA_dKmm: logdet(self.beta_star) + self._dL_dKmm += .5 * (LBL_inv - Kmmi) + mdot(LBL_inv, psi1beta, Kmmipsi1.T) # dC_dKmm + self._dL_dKmm += -.5 * mdot(Hi, self.psi1, gamma_1) # dD_dKmm + + self._dpsi1_dtheta = 0 + self._dpsi1_dX = 0 + self._dKmm_dtheta = 0 + self._dKmm_dX = 0 + + self._dpsi1_dX_jkj = 0 + self._dpsi1_dtheta_jkj = 0 + + for i, V_n, alpha_n, gamma_n, gamma_k in zip(range(self.N), self.V_star, alpha, gamma_2, gamma_3): + K_pp_K = np.dot(Kmmipsi1[:, i:(i + 1)], Kmmipsi1[:, i:(i + 1)].T) + + # Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1 + _dpsi1 = (-V_n ** 2 - alpha_n + 2.*gamma_k - gamma_n ** 2) * Kmmipsi1.T[i:(i + 1), :] + + # Diag_dKmm = Diag_dA_dKmm: yT*beta_star*y +Diag_dC_dKmm +Diag_dD_dKmm + _dKmm = .5 * (V_n ** 2 + alpha_n + gamma_n ** 2 - 2.*gamma_k) * K_pp_K # Diag_dD_dKmm + + self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1, self.X[i:i + 1, :], self.Z) + self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm, self.Z) + + self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm , self.Z) + self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T, self.Z, self.X[i:i + 1, :]) + + # the partial derivative vector for the likelihood + if self.likelihood.Nparams == 0: + # save computation here. + self.partial_for_likelihood = None + elif self.likelihood.is_heteroscedastic: + raise NotImplementedError, "heteroscedatic derivates not implemented" + else: + # likelihood is not heterscedatic + dbstar_dnoise = self.likelihood.precision * (self.beta_star ** 2 * self.Diag0[:, None] - self.beta_star) + Lmi_psi1 = mdot(self.Lmi, self.psi1) + LBiLmipsi1 = np.dot(self.LBi, Lmi_psi1) + aux_0 = np.dot(self._LBi_Lmi_psi1V.T, LBiLmipsi1) + aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T, LBiLmipsi1) + aux_2 = np.dot(LBiLmipsi1.T, self._LBi_Lmi_psi1V) + + dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise / self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y ** 2 * dbstar_dnoise) + dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T, self.LBi, Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) + dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T, self.LBi, Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) + + dD_dnoise_1 = mdot(self.V_star * LBiLmipsi1.T, LBiLmipsi1 * dbstar_dnoise.T * self.likelihood.Y.T) + alpha = mdot(LBiLmipsi1, self.V_star) + alpha_ = mdot(LBiLmipsi1.T, alpha) + dD_dnoise_2 = -0.5 * self.input_dim * np.sum(alpha_ ** 2 * dbstar_dnoise) + + dD_dnoise_1 = mdot(self.V_star.T, self.psi1.T, self.Lmi.T, self.LBi.T, self.LBi, self.Lmi, self.psi1, dbstar_dnoise * self.likelihood.Y) + dD_dnoise_2 = 0.5 * mdot(self.V_star.T, self.psi1.T, Hi, self.psi1, dbstar_dnoise * self.psi1.T, Hi, self.psi1, self.V_star) + dD_dnoise = dD_dnoise_1 + dD_dnoise_2 + + self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise + + def log_likelihood(self): + """ Compute the (lower bound on the) log marginal likelihood """ + A = -0.5 * self.N * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y) + C = -self.input_dim * (np.sum(np.log(np.diag(self.LB)))) + D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) + return A + C + D + + def _log_likelihood_gradients(self): + pass + return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood))) + + def dL_dtheta(self): + if self.has_uncertain_inputs: + raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" + else: + dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0, self.X) + dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1, self.X, self.Z) + dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm, X=self.Z) + dL_dtheta += self._dKmm_dtheta + dL_dtheta += self._dpsi1_dtheta + return dL_dtheta + + def dL_dZ(self): + if self.has_uncertain_inputs: + raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" + else: + dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T, self.Z, self.X) + dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm, X=self.Z) + dL_dZ += self._dpsi1_dX + dL_dZ += self._dKmm_dX + return dL_dZ + + def _raw_predict(self, Xnew, which_parts, full_cov=False): + + if self.likelihood.is_heteroscedastic: + Iplus_Dprod_i = 1. / (1. + self.Diag0 * self.likelihood.precision.flatten()) + self.Diag = self.Diag0 * Iplus_Dprod_i + self.P = Iplus_Dprod_i[:, None] * self.psi1.T + self.RPT0 = np.dot(self.Lmi, self.psi1) + self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0, ((1. - Iplus_Dprod_i) / self.Diag0)[:, None] * self.RPT0.T)) + self.R, info = linalg.flapack.dtrtrs(self.L, self.Lmi, lower=1) + self.RPT = np.dot(self.R, self.P.T) + self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T, self.RPT) + self.w = self.Diag * self.likelihood.v_tilde + self.Gamma = np.dot(self.R.T, np.dot(self.RPT, self.likelihood.v_tilde)) + self.mu = self.w + np.dot(self.P, self.Gamma) + + """ + Make a prediction for the generalized FITC model + + Arguments + --------- + X : Input prediction data - Nx1 numpy array (floats) + """ + # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T) + + # Ci = I + (RPT0)Di(RPT0).T + # C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T + # = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T + # = I - [RPT0] * (U*U.T)^-1 * [RPT0].T + # = I - V.T * V + U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn) + V, info = linalg.flapack.dtrtrs(U, self.RPT0.T, lower=1) + C = np.eye(self.num_inducing) - np.dot(V.T, V) + mu_u = np.dot(C, self.RPT0) * (1. / self.Diag0[None, :]) + # self.C = C + # self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T + # self.mu_u = mu_u + # self.U = U + # q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T) + mu_H = np.dot(mu_u, self.mu) + self.mu_H = mu_H + Sigma_H = C + np.dot(mu_u, np.dot(self.Sigma, mu_u.T)) + # q(f_star|y) = N(f_star|mu_star,sigma2_star) + Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) + KR0T = np.dot(Kx.T, self.Lmi.T) + mu_star = np.dot(KR0T, mu_H) + if full_cov: + Kxx = self.kern.K(Xnew, which_parts=which_parts) + var = Kxx + np.dot(KR0T, np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T)) + else: + Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) + var = (Kxx + np.sum(KR0T.T * np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T), 0))[:, None] + return mu_star[:, None], var + else: + raise NotImplementedError, "homoscedastic fitc not implemented" + """ + Kx = self.kern.K(self.Z, Xnew) + mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) + if full_cov: + Kxx = self.kern.K(Xnew) + var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting + else: + Kxx = self.kern.Kdiag(Xnew) + var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + return mu,var[:,None] + """ diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_fitc.py similarity index 79% rename from GPy/models/generalized_FITC.py rename to GPy/models/generalized_fitc.py index a0bb508c..a96609cc 100644 --- a/GPy/models/generalized_FITC.py +++ b/GPy/models/generalized_fitc.py @@ -7,7 +7,11 @@ from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot from ..util.plot import gpplot from .. import kern from scipy import stats, linalg +<<<<<<< HEAD:GPy/models/generalized_FITC.py from sparse_GP import sparse_GP +======= +from ..core import SparseGP +>>>>>>> 7040b26f41f382edfdca3d3f7b689b9bbfc1a54f:GPy/models/generalized_fitc.py def backsub_both_sides(L,X): """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky""" @@ -15,7 +19,7 @@ def backsub_both_sides(L,X): return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T -class generalized_FITC(sparse_GP): +class GeneralizedFITC(SparseGP): """ Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC. @@ -28,9 +32,9 @@ class generalized_FITC(sparse_GP): :param X_variance: The variance in the measurements of X (Gaussian variance) :type X_variance: np.ndarray (N x input_dim) | None :param Z: inducing inputs (optional, see note) - :type Z: np.ndarray (M x input_dim) | None - :param M : Number of inducing points (optional, default 10. Ignored if Z is not None) - :type M: int + :type Z: np.ndarray (num_inducing x input_dim) | None + :param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None) + :type num_inducing: int :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :type normalize_(X|Y): bool """ @@ -38,13 +42,18 @@ class generalized_FITC(sparse_GP): def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): self.Z = Z - self.M = self.Z.shape[0] + self.num_inducing = self.Z.shape[0] self.true_precision = likelihood.precision +<<<<<<< HEAD:GPy/models/generalized_FITC.py sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False) +======= + super(GeneralizedFITC, self).__init__(X, likelihood, kernel=kernel, Z=self.Z, X_variance=X_variance, normalize_X=normalize_X) + self._set_params(self._get_params()) +>>>>>>> 7040b26f41f382edfdca3d3f7b689b9bbfc1a54f:GPy/models/generalized_fitc.py def _set_params(self, p): - self.Z = p[:self.M*self.input_dim].reshape(self.M, self.input_dim) + self.Z = p[:self.num_inducing*self.input_dim].reshape(self.num_inducing, self.input_dim) self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) self._compute_kernel_matrices() @@ -58,7 +67,7 @@ class generalized_FITC(sparse_GP): For a Gaussian (or direct: TODO) likelihood, no iteration is required: this function does nothing - Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP. + Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in SparseGP. The true precison is now 'true_precision' not 'precision'. """ if self.has_uncertain_inputs: @@ -75,14 +84,14 @@ class generalized_FITC(sparse_GP): but adds a diagonal term to the covariance matrix: diag(Knn - Qnn). This function: - computes the FITC diagonal term - - removes the extra terms computed in the sparse_GP approximation + - removes the extra terms computed in the SparseGP approximation - computes the likelihood gradients wrt the true precision. """ #NOTE the true precison is now 'true_precision' not 'precision' if self.likelihood.is_heteroscedastic: # Compute generalized FITC's diagonal term of the covariance - self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1) + self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.num_inducing),lower=1) Lmipsi1 = np.dot(self.Lmi,self.psi1) self.Qnn = np.dot(Lmipsi1.T,Lmipsi1) #self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm) @@ -94,7 +103,7 @@ class generalized_FITC(sparse_GP): self.P = Iplus_Dprod_i[:,None] * self.psi1.T self.RPT0 = np.dot(self.Lmi,self.psi1) - self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T)) + self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T)) self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1) self.RPT = np.dot(self.R,self.P.T) self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT) @@ -122,7 +131,7 @@ class generalized_FITC(sparse_GP): sf2 = sf**2 # Remove extra term from dL_dKmm - self.dL_dKmm += 0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB + self.dL_dKmm += 0.5 * self.input_dim * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB self.dL_dpsi0 = None #the partial derivative vector for the likelihood @@ -133,8 +142,8 @@ class generalized_FITC(sparse_GP): else: raise NotImplementedError, "homoscedastic derivatives not implemented" #likelihood is not heterscedatic - #self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2 - #self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision + #self.partial_for_likelihood = - 0.5 * self.N*self.input_dim*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2 + #self.partial_for_likelihood += 0.5 * self.input_dim * trace_dot(self.Bi,self.A)*self.likelihood.precision #self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1)) #TODO partial derivative vector for the likelihood not implemented @@ -146,7 +155,7 @@ class generalized_FITC(sparse_GP): if self.has_uncertain_inputs: raise NotImplementedError, "heteroscedatic derivates not implemented" else: - #NOTE in sparse_GP this would include the gradient wrt psi0 + #NOTE in SparseGP this would include the gradient wrt psi0 dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X) return dL_dtheta @@ -155,11 +164,11 @@ class generalized_FITC(sparse_GP): """ Compute the (lower bound on the) log marginal likelihood """ sf2 = self.scale_factor**2 if self.likelihood.is_heteroscedastic: - A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) + A = -0.5*self.N*self.input_dim*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) else: - A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT - C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2)) - #C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) + A = -0.5*self.N*self.input_dim*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT + C = -self.input_dim * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.num_inducing*np.log(sf2)) + #C = -0.5*self.input_dim * (self.B_logdet + self.num_inducing*np.log(sf2)) D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V)) #self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T) #D_ = 0.5*np.trace(self.Cpsi1VVpsi1) @@ -177,13 +186,13 @@ class generalized_FITC(sparse_GP): # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T) # Ci = I + (RPT0)Di(RPT0).T - # C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T - # = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T + # C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T + # = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T # = I - [RPT0] * (U*U.T)^-1 * [RPT0].T # = I - V.T * V U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn) V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1) - C = np.eye(self.M) - np.dot(V.T,V) + C = np.eye(self.num_inducing) - np.dot(V.T,V) mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:]) #self.C = C #self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T @@ -199,12 +208,12 @@ class generalized_FITC(sparse_GP): mu_star = np.dot(KR0T,mu_H) if full_cov: Kxx = self.kern.K(Xnew,which_parts=which_parts) - var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) + var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T)) else: Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts) Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed? - var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed? - var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None] + var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T)) # TODO: RA, is this line needed? + var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T),0))[:,None] return mu_star[:,None],var else: raise NotImplementedError, "homoscedastic fitc not implemented" diff --git a/GPy/models/GP_classification.py b/GPy/models/gp_classification.py similarity index 91% rename from GPy/models/GP_classification.py rename to GPy/models/gp_classification.py index 2b47aa08..74455a2b 100644 --- a/GPy/models/GP_classification.py +++ b/GPy/models/gp_classification.py @@ -15,7 +15,7 @@ class GP_classification(GP): :param X: input observations :param Y: observed values - :param likelihood: a GPy likelihood, defaults to binomial with probit link_function + :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function :param kernel: a GPy kernel, defaults to rbf :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True @@ -31,7 +31,7 @@ class GP_classification(GP): kernel = kern.rbf(X.shape[1]) if likelihood is None: - distribution = likelihoods.likelihood_functions.binomial() + distribution = likelihoods.likelihood_functions.Binomial() likelihood = likelihoods.EP(Y, distribution) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): diff --git a/GPy/models/GP_regression.py b/GPy/models/gp_regression.py similarity index 97% rename from GPy/models/GP_regression.py rename to GPy/models/gp_regression.py index b892eda8..8d0b02e0 100644 --- a/GPy/models/GP_regression.py +++ b/GPy/models/gp_regression.py @@ -7,7 +7,7 @@ from ..core import GP from .. import likelihoods from .. import kern -class GP_regression(GP): +class GPRegression(GP): """ Gaussian Process model for regression diff --git a/GPy/models/GPLVM.py b/GPy/models/gplvm.py similarity index 100% rename from GPy/models/GPLVM.py rename to GPy/models/gplvm.py diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index e91298aa..4300f113 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -4,14 +4,13 @@ Created on 10 Apr 2013 @author: Max Zwiessele ''' from GPy.core import model -from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM -from GPy.core import sparse_GP +from GPy.core import SparseGP from GPy.util.linalg import PCA -from scipy import linalg import numpy import itertools import pylab from GPy.kern.kern import kern +from GPy.models.bayesian_gplvm import BayesianGPLVM class MRD(model): """ @@ -38,7 +37,7 @@ class MRD(model): *concat: PCA on concatenated outputs *single: PCA on each output *random: random - :param M: + :param num_inducing: number of inducing inputs to use :param Z: initial inducing inputs @@ -62,22 +61,22 @@ class MRD(model): assert not ('kernel' in kw), "pass kernels through `kernels` argument" self.input_dim = input_dim - self.M = M + self.num_inducing = M self._debug = _debug self._init = True X = self._init_X(initx, likelihood_or_Y_list) Z = self._init_Z(initz, X) - self.bgplvms = [Bayesian_GPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.M, **kw) for l, k in zip(likelihood_or_Y_list, kernels)] + self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)] del self._init self.gref = self.bgplvms[0] - nparams = numpy.array([0] + [sparse_GP._get_params(g).size - g.Z.size for g in self.bgplvms]) + nparams = numpy.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms]) self.nparams = nparams.cumsum() self.N = self.gref.N self.NQ = self.N * self.input_dim - self.MQ = self.M * self.input_dim + self.MQ = self.num_inducing * self.input_dim model.__init__(self) # @UndefinedVariable self._set_params(self._get_params()) @@ -151,7 +150,7 @@ class MRD(model): itertools.izip(ns, itertools.repeat(name))) return list(itertools.chain(n1var, *(map_names(\ - sparse_GP._get_param_names(g)[self.MQ:], n) \ + SparseGP._get_param_names(g)[self.MQ:], n) \ for g, n in zip(self.bgplvms, self.names)))) def _get_params(self): @@ -165,14 +164,14 @@ class MRD(model): X = self.gref.X.ravel() X_var = self.gref.X_variance.ravel() Z = self.gref.Z.ravel() - thetas = [sparse_GP._get_params(g)[g.Z.size:] for g in self.bgplvms] + thetas = [SparseGP._get_params(g)[g.Z.size:] for g in self.bgplvms] params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)]) return params # def _set_var_params(self, g, X, X_var, Z): # g.X = X.reshape(self.N, self.input_dim) # g.X_variance = X_var.reshape(self.N, self.input_dim) -# g.Z = Z.reshape(self.M, self.input_dim) +# g.Z = Z.reshape(self.num_inducing, self.input_dim) # # def _set_kern_params(self, g, p): # g.kern._set_params(p[:g.kern.Nparam]) @@ -206,7 +205,7 @@ class MRD(model): def log_likelihood(self): ll = -self.gref.KL_divergence() for g in self.bgplvms: - ll += sparse_GP.log_likelihood(g) + ll += SparseGP.log_likelihood(g) return ll def _log_likelihood_gradients(self): @@ -215,7 +214,7 @@ class MRD(model): dLdmu -= dKLmu dLdS -= dKLdS dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten() - dldzt1 = reduce(lambda a, b: a + b, (sparse_GP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms)) + dldzt1 = reduce(lambda a, b: a + b, (SparseGP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms)) return numpy.hstack((dLdmuS, dldzt1, @@ -250,9 +249,9 @@ class MRD(model): if X is None: X = self.X if init in "permute": - Z = numpy.random.permutation(X.copy())[:self.M] + Z = numpy.random.permutation(X.copy())[:self.num_inducing] elif init in "random": - Z = numpy.random.randn(self.M, self.input_dim) * X.var() + Z = numpy.random.randn(self.num_inducing, self.input_dim) * X.var() self.Z = Z return Z @@ -274,8 +273,8 @@ class MRD(model): else: return pylab.gcf() - def plot_X_1d(self): - return self.gref.plot_X_1d() + def plot_X_1d(self, *a, **kw): + return self.gref.plot_X_1d(*a, **kw) def plot_X(self, fignum=None, ax=None): fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X)) diff --git a/GPy/models/sparse_GPLVM.py b/GPy/models/sparse_GPLVM.py deleted file mode 100644 index ce71cd9b..00000000 --- a/GPy/models/sparse_GPLVM.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -import pylab as pb -import sys, pdb -# from .. import kern -# from ..core import model -# from ..util.linalg import pdinv, PCA -from GPLVM import GPLVM -from sparse_GP_regression import sparse_GP_regression - -class sparse_GPLVM(sparse_GP_regression, GPLVM): - """ - Sparse Gaussian Process Latent Variable Model - - :param Y: observed data - :type Y: np.ndarray - :param input_dim: latent dimensionality - :type input_dim: int - :param init: initialisation method for the latent space - :type init: 'PCA'|'random' - - """ - def __init__(self, Y, input_dim, kernel=None, init='PCA', M=10): - X = self.initialise_latent(init, input_dim, Y) - sparse_GP_regression.__init__(self, X, Y, kernel=kernel,M=M) - - def _get_param_names(self): - return (sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.N)],[]) - + sparse_GP_regression._get_param_names(self)) - - def _get_params(self): - return np.hstack((self.X.flatten(), sparse_GP_regression._get_params(self))) - - def _set_params(self,x): - self.X = x[:self.X.size].reshape(self.N,self.input_dim).copy() - sparse_GP_regression._set_params(self, x[self.X.size:]) - - def log_likelihood(self): - return sparse_GP_regression.log_likelihood(self) - - def dL_dX(self): - dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0,self.X) - dL_dX += self.kern.dK_dX(self.dL_dpsi1.T,self.X,self.Z) - - return dL_dX - - def _log_likelihood_gradients(self): - return np.hstack((self.dL_dX().flatten(), sparse_GP_regression._log_likelihood_gradients(self))) - - def plot(self): - GPLVM.plot(self) - #passing Z without a small amout of jitter will induce the white kernel where we don;t want it! - mu, var, upper, lower = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001) - pb.plot(mu[:, 0] , mu[:, 1], 'ko') - - def plot_latent(self, *args, **kwargs): - input_1, input_2 = GPLVM.plot_latent(*args, **kwargs) - pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w') diff --git a/GPy/models/sparse_GP_classification.py b/GPy/models/sparse_gp_classification.py similarity index 92% rename from GPy/models/sparse_GP_classification.py rename to GPy/models/sparse_gp_classification.py index 752265fe..ee244aa5 100644 --- a/GPy/models/sparse_GP_classification.py +++ b/GPy/models/sparse_gp_classification.py @@ -16,7 +16,7 @@ class sparse_GP_classification(sparse_GP): :param X: input observations :param Y: observed values - :param likelihood: a GPy likelihood, defaults to binomial with probit link_function + :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function :param kernel: a GPy kernel, defaults to rbf+white :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True @@ -31,7 +31,7 @@ class sparse_GP_classification(sparse_GP): kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) if likelihood is None: - distribution = likelihoods.likelihood_functions.binomial() + distribution = likelihoods.likelihood_functions.Binomial() likelihood = likelihoods.EP(Y, distribution) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_gp_regression.py similarity index 65% rename from GPy/models/sparse_GP_regression.py rename to GPy/models/sparse_gp_regression.py index 78089ded..d7907c5f 100644 --- a/GPy/models/sparse_GP_regression.py +++ b/GPy/models/sparse_gp_regression.py @@ -3,17 +3,15 @@ import numpy as np -from ..core import sparse_GP +from ..core import SparseGP from .. import likelihoods from .. import kern -from ..likelihoods import likelihood -from GP_regression import GP_regression -class sparse_GP_regression(sparse_GP): +class SparseGPRegression(SparseGP): """ Gaussian Process model for regression - This is a thin wrapper around the sparse_GP class, with a set of sensible defalts + This is a thin wrapper around the SparseGP class, with a set of sensible defalts :param X: input observations :param Y: observed values @@ -29,19 +27,19 @@ class sparse_GP_regression(sparse_GP): """ def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10, X_variance=None): - #kern defaults to rbf (plus white for stability) + # kern defaults to rbf (plus white for stability) if kernel is None: - kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) + kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3) - #Z defaults to a subset of the data + # Z defaults to a subset of the data if Z is None: i = np.random.permutation(X.shape[0])[:M] Z = X[i].copy() else: - assert Z.shape[1]==X.shape[1] + assert Z.shape[1] == X.shape[1] - #likelihood defaults to Gaussian - likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y) + # likelihood defaults to Gaussian + likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y) - sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance) + SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance) self._set_params(self._get_params()) diff --git a/GPy/models/sparse_gplvm.py b/GPy/models/sparse_gplvm.py new file mode 100644 index 00000000..bec8c2da --- /dev/null +++ b/GPy/models/sparse_gplvm.py @@ -0,0 +1,61 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +import pylab as pb +import sys, pdb +from GPy.models.sparse_gp_regression import SparseGPRegression +from GPy.models.gplvm import GPLVM +# from .. import kern +# from ..core import model +# from ..util.linalg import pdinv, PCA + +class SparseGPLVM(SparseGPRegression, GPLVM): + """ + Sparse Gaussian Process Latent Variable Model + + :param Y: observed data + :type Y: np.ndarray + :param input_dim: latent dimensionality + :type input_dim: int + :param init: initialisation method for the latent space + :type init: 'PCA'|'random' + + """ + def __init__(self, Y, input_dim, kernel=None, init='PCA', M=10): + X = self.initialise_latent(init, input_dim, Y) + SparseGPRegression.__init__(self, X, Y, kernel=kernel, M=M) + + def _get_param_names(self): + return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) + + SparseGPRegression._get_param_names(self)) + + def _get_params(self): + return np.hstack((self.X.flatten(), SparseGPRegression._get_params(self))) + + def _set_params(self, x): + self.X = x[:self.X.size].reshape(self.N, self.input_dim).copy() + SparseGPRegression._set_params(self, x[self.X.size:]) + + def log_likelihood(self): + return SparseGPRegression.log_likelihood(self) + + def dL_dX(self): + dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X) + dL_dX += self.kern.dK_dX(self.dL_dpsi1.T, self.X, self.Z) + + return dL_dX + + def _log_likelihood_gradients(self): + return np.hstack((self.dL_dX().flatten(), SparseGPRegression._log_likelihood_gradients(self))) + + def plot(self): + GPLVM.plot(self) + # passing Z without a small amout of jitter will induce the white kernel where we don;t want it! + mu, var, upper, lower = SparseGPRegression.predict(self, self.Z + np.random.randn(*self.Z.shape) * 0.0001) + pb.plot(mu[:, 0] , mu[:, 1], 'ko') + + def plot_latent(self, *args, **kwargs): + input_1, input_2 = GPLVM.plot_latent(*args, **kwargs) + pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w') diff --git a/GPy/models/warped_GP.py b/GPy/models/warped_gp.py similarity index 80% rename from GPy/models/warped_GP.py rename to GPy/models/warped_gp.py index 86a69226..fcef66c6 100644 --- a/GPy/models/warped_GP.py +++ b/GPy/models/warped_gp.py @@ -3,25 +3,21 @@ import numpy as np -from .. import kern -from ..core import model -from ..util.linalg import pdinv -from ..util.plot import gpplot from ..util.warping_functions import * -from GP_regression import GP_regression from ..core import GP from .. import likelihoods -from .. import kern +from GPy.util.warping_functions import TanhWarpingFunction_d +from GPy import kern -class warpedGP(GP): - def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False): +class WarpedGP(GP): + def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalize_X=False, normalize_Y=False): if kernel is None: kernel = kern.rbf(X.shape[1]) if warping_function == None: self.warping_function = TanhWarpingFunction_d(warping_terms) - self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1) + self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1,) * 1) Y = self._scale_data(Y) self.has_uncertain_inputs = False @@ -35,10 +31,10 @@ class warpedGP(GP): def _scale_data(self, Y): self._Ymax = Y.max() self._Ymin = Y.min() - return (Y-self._Ymin)/(self._Ymax-self._Ymin) - 0.5 + return (Y - self._Ymin) / (self._Ymax - self._Ymin) - 0.5 def _unscale_data(self, Y): - return (Y + 0.5)*(self._Ymax - self._Ymin) + self._Ymin + return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin def _set_params(self, x): self.warping_params = x[:self.warping_function.num_parameters] @@ -68,15 +64,15 @@ class warpedGP(GP): alpha = np.dot(self.Ki, self.likelihood.Y.flatten()) warping_grads = self.warping_function_gradients(alpha) - warping_grads = np.append(warping_grads[:,:-1].flatten(), warping_grads[0,-1]) + warping_grads = np.append(warping_grads[:, :-1].flatten(), warping_grads[0, -1]) return np.hstack((warping_grads.flatten(), ll_grads.flatten())) def warping_function_gradients(self, Kiy): grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params) grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params, - return_covar_chain = True) - djac_dpsi = ((1.0/grad_y[:,:, None, None])*grad_y_psi).sum(axis=0).sum(axis=0) - dquad_dpsi = (Kiy[:,None,None,None] * grad_psi).sum(axis=0).sum(axis=0) + return_covar_chain=True) + djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0) + dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0) return -dquad_dpsi + djac_dpsi diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py index ae72983a..3ca32660 100644 --- a/GPy/testing/bgplvm_tests.py +++ b/GPy/testing/bgplvm_tests.py @@ -4,6 +4,7 @@ import unittest import numpy as np import GPy +from GPy.models.bayesian_gplvm import BayesianGPLVM class BGPLVMTests(unittest.TestCase): def test_bias_kern(self): @@ -11,10 +12,10 @@ class BGPLVMTests(unittest.TestCase): X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) @@ -24,10 +25,10 @@ class BGPLVMTests(unittest.TestCase): X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) @@ -37,10 +38,10 @@ class BGPLVMTests(unittest.TestCase): X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) @@ -50,10 +51,10 @@ class BGPLVMTests(unittest.TestCase): X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) @@ -64,10 +65,10 @@ class BGPLVMTests(unittest.TestCase): X = np.random.rand(N, input_dim) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py index a06f1090..51131fc3 100644 --- a/GPy/testing/examples_tests.py +++ b/GPy/testing/examples_tests.py @@ -9,6 +9,7 @@ import pkgutil import os import random from nose.tools import nottest +import sys class ExamplesTests(unittest.TestCase): def _checkgrad(self, model): @@ -40,9 +41,9 @@ def model_instance(model): @nottest def test_models(): examples_path = os.path.dirname(GPy.examples.__file__) - #Load modules + # Load modules for loader, module_name, is_pkg in pkgutil.iter_modules([examples_path]): - #Load examples + # Load examples module_examples = loader.find_module(module_name).load_module(module_name) print "MODULE", module_examples print "Before" @@ -56,11 +57,11 @@ def test_models(): continue print "Testing example: ", example[0] - #Generate model + # Generate model model = example[1]() print model - #Create tests for instance check + # Create tests for instance check """ test = model_instance_generator(model) test.__name__ = 'test_instance_%s' % example[0] @@ -78,4 +79,5 @@ def test_models(): if __name__ == "__main__": print "Running unit tests, please be (very) patient..." - unittest.main() + # unittest.main() + test_models() diff --git a/GPy/testing/gplvm_tests.py b/GPy/testing/gplvm_tests.py index b1721a3c..4194698f 100644 --- a/GPy/testing/gplvm_tests.py +++ b/GPy/testing/gplvm_tests.py @@ -11,7 +11,7 @@ class GPLVMTests(unittest.TestCase): X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) m = GPy.models.GPLVM(Y, input_dim, kernel = k) m.ensure_default_constraints() @@ -23,7 +23,7 @@ class GPLVMTests(unittest.TestCase): X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) m = GPy.models.GPLVM(Y, input_dim, kernel = k) m.ensure_default_constraints() @@ -35,7 +35,7 @@ class GPLVMTests(unittest.TestCase): X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) m = GPy.models.GPLVM(Y, input_dim, kernel = k) m.ensure_default_constraints() diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index b48bc813..2d97c82c 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -12,7 +12,7 @@ class KernelTests(unittest.TestCase): K.constrain_fixed('2') X = np.random.rand(5,5) Y = np.ones((5,1)) - m = GPy.models.GP_regression(X,Y,K) + m = GPy.models.GPRegression(X,Y,K) self.assertTrue(m.checkgrad()) def test_fixedkernel(self): @@ -23,7 +23,7 @@ class KernelTests(unittest.TestCase): K = np.dot(X, X.T) kernel = GPy.kern.fixed(4, K) Y = np.ones((30,1)) - m = GPy.models.GP_regression(X,Y,kernel=kernel) + m = GPy.models.GPRegression(X,Y,kernel=kernel) self.assertTrue(m.checkgrad()) def test_coregionalisation(self): @@ -36,9 +36,9 @@ class KernelTests(unittest.TestCase): Y = np.vstack((Y1,Y2)) k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) - k2 = GPy.kern.coregionalise(2,1) + k2 = GPy.kern.Coregionalise(2,1) k = k1.prod(k2,tensor=True) - m = GPy.models.GP_regression(X,Y,kernel=k) + m = GPy.models.GPRegression(X,Y,kernel=k) self.assertTrue(m.checkgrad()) diff --git a/GPy/testing/mrd_tests.py b/GPy/testing/mrd_tests.py index 25adbca6..df4c2723 100644 --- a/GPy/testing/mrd_tests.py +++ b/GPy/testing/mrd_tests.py @@ -20,7 +20,7 @@ class MRDTests(unittest.TestCase): k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) K = k.K(X) - Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)] + Ylist = [np.random.multivariate_normal(np.zeros(N), K, input_dim).T for _ in range(num_m)] likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist] m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, M=M) diff --git a/GPy/testing/prior_tests.py b/GPy/testing/prior_tests.py index d3269560..e0226751 100644 --- a/GPy/testing/prior_tests.py +++ b/GPy/testing/prior_tests.py @@ -13,7 +13,7 @@ class PriorTests(unittest.TestCase): y = b*X + C + 1*np.sin(X) y += 0.05*np.random.randn(len(X)) X, y = X[:, None], y[:, None] - m = GPy.models.GP_regression(X, y) + m = GPy.models.GPRegression(X, y) m.ensure_default_constraints() lognormal = GPy.priors.LogGaussian(1, 2) m.set_prior('rbf', lognormal) @@ -27,7 +27,7 @@ class PriorTests(unittest.TestCase): y = b*X + C + 1*np.sin(X) y += 0.05*np.random.randn(len(X)) X, y = X[:, None], y[:, None] - m = GPy.models.GP_regression(X, y) + m = GPy.models.GPRegression(X, y) m.ensure_default_constraints() Gamma = GPy.priors.Gamma(1, 1) m.set_prior('rbf', Gamma) @@ -41,7 +41,7 @@ class PriorTests(unittest.TestCase): y = b*X + C + 1*np.sin(X) y += 0.05*np.random.randn(len(X)) X, y = X[:, None], y[:, None] - m = GPy.models.GP_regression(X, y) + m = GPy.models.GPRegression(X, y) m.ensure_default_constraints() gaussian = GPy.priors.Gaussian(1, 1) success = False diff --git a/GPy/testing/psi_stat_expactation_tests.py b/GPy/testing/psi_stat_expactation_tests.py index 95f83fb5..da71754b 100644 --- a/GPy/testing/psi_stat_expactation_tests.py +++ b/GPy/testing/psi_stat_expactation_tests.py @@ -21,36 +21,36 @@ def ard(p): @testing.deepTest(__test__) class Test(unittest.TestCase): - D = 9 - M = 4 + input_dim = 9 + num_inducing = 4 N = 3 Nsamples = 6e6 def setUp(self): self.kerns = ( -# (GPy.kern.rbf(self.D, ARD=True) + -# GPy.kern.linear(self.D, ARD=True) + -# GPy.kern.bias(self.D) + -# GPy.kern.white(self.D)), - (GPy.kern.rbf(self.D, np.random.rand(), np.random.rand(self.D), ARD=True) + - GPy.kern.rbf(self.D, np.random.rand(), np.random.rand(self.D), ARD=True) + - GPy.kern.linear(self.D, np.random.rand(self.D), ARD=True) + - GPy.kern.bias(self.D) + - GPy.kern.white(self.D)), -# GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True), -# GPy.kern.linear(self.D, ARD=False), GPy.kern.linear(self.D, ARD=True), -# GPy.kern.linear(self.D) + GPy.kern.bias(self.D), -# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D), -# GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D), -# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D), -# GPy.kern.bias(self.D), GPy.kern.white(self.D), +# (GPy.kern.rbf(self.input_dim, ARD=True) + +# GPy.kern.linear(self.input_dim, ARD=True) + +# GPy.kern.bias(self.input_dim) + +# GPy.kern.white(self.input_dim)), + (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + + GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + + GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + + GPy.kern.bias(self.input_dim) + + GPy.kern.white(self.input_dim)), +# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True), +# GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True), +# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim), +# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim), +# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim), +# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim), +# GPy.kern.bias(self.input_dim), GPy.kern.white(self.input_dim), ) - self.q_x_mean = np.random.randn(self.D) - self.q_x_variance = np.exp(np.random.randn(self.D)) - self.q_x_samples = np.random.randn(self.Nsamples, self.D) * np.sqrt(self.q_x_variance) + self.q_x_mean - self.Z = np.random.randn(self.M, self.D) - self.q_x_mean.shape = (1, self.D) - self.q_x_variance.shape = (1, self.D) + self.q_x_mean = np.random.randn(self.input_dim) + self.q_x_variance = np.exp(np.random.randn(self.input_dim)) + self.q_x_samples = np.random.randn(self.Nsamples, self.input_dim) * np.sqrt(self.q_x_variance) + self.q_x_mean + self.Z = np.random.randn(self.num_inducing, self.input_dim) + self.q_x_mean.shape = (1, self.input_dim) + self.q_x_variance.shape = (1, self.input_dim) def test_psi0(self): for kern in self.kerns: @@ -63,7 +63,7 @@ class Test(unittest.TestCase): for kern in self.kerns: Nsamples = 100 psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance) - K_ = np.zeros((Nsamples, self.M)) + K_ = np.zeros((Nsamples, self.num_inducing)) diffs = [] for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)): K = kern.K(q_x_sample_stripe, self.Z) @@ -89,7 +89,7 @@ class Test(unittest.TestCase): for kern in self.kerns: Nsamples = 100 psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance) - K_ = np.zeros((self.M, self.M)) + K_ = np.zeros((self.num_inducing, self.num_inducing)) diffs = [] for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)): K = kern.K(q_x_sample_stripe, self.Z) diff --git a/GPy/testing/psi_stat_gradient_tests.py b/GPy/testing/psi_stat_gradient_tests.py index 23f841b5..2257d16c 100644 --- a/GPy/testing/psi_stat_gradient_tests.py +++ b/GPy/testing/psi_stat_gradient_tests.py @@ -17,14 +17,14 @@ class PsiStatModel(model): self.X_variance = X_variance self.Z = Z self.N, self.input_dim = X.shape - self.M, input_dim = Z.shape + self.num_inducing, input_dim = Z.shape assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape) self.kern = kernel super(PsiStatModel, self).__init__() self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance) def _get_param_names(self): Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.input_dim))] - Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.M), range(self.input_dim))] + Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.num_inducing), range(self.input_dim))] return Xnames + Znames + self.kern._get_param_names() def _get_params(self): return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()]) @@ -34,7 +34,7 @@ class PsiStatModel(model): start, end = end, end + self.X_variance.size self.X_variance = x[start: end].reshape(self.N, self.input_dim) start, end = end, end + self.Z.size - self.Z = x[start: end].reshape(self.M, self.input_dim) + self.Z = x[start: end].reshape(self.num_inducing, self.input_dim) self.kern._set_params(x[end:]) def log_likelihood(self): return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum() @@ -43,19 +43,19 @@ class PsiStatModel(model): try: psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance) except AttributeError: - psiZ = numpy.zeros(self.M * self.input_dim) + psiZ = numpy.zeros(self.num_inducing * self.input_dim) thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten() return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad)) class DPsiStatTest(unittest.TestCase): input_dim = 5 N = 50 - M = 10 - D = 20 + num_inducing = 10 + input_dim = 20 X = numpy.random.randn(N, input_dim) X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1) - Z = numpy.random.permutation(X)[:M] - Y = X.dot(numpy.random.randn(input_dim, D)) + Z = numpy.random.permutation(X)[:num_inducing] + Y = X.dot(numpy.random.randn(input_dim, input_dim)) # kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)] kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim), @@ -65,7 +65,7 @@ class DPsiStatTest(unittest.TestCase): def testPsi0(self): for k in self.kernels: m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.M, kernel=k) + M=self.num_inducing, kernel=k) try: assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts))) except: @@ -80,27 +80,27 @@ class DPsiStatTest(unittest.TestCase): def testPsi2_lin(self): k = self.kernels[0] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.M, kernel=k) + M=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_lin_bia(self): k = self.kernels[3] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.M, kernel=k) + M=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_rbf(self): k = self.kernels[1] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.M, kernel=k) + M=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_rbf_bia(self): k = self.kernels[-1] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.M, kernel=k) + M=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_bia(self): k = self.kernels[2] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.M, kernel=k) + M=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) @@ -108,14 +108,14 @@ if __name__ == "__main__": import sys interactive = 'i' in sys.argv if interactive: -# N, M, input_dim, D = 30, 5, 4, 30 +# N, num_inducing, input_dim, input_dim = 30, 5, 4, 30 # X = numpy.random.rand(N, input_dim) # k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) # K = k.K(X) -# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T +# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, input_dim).T # Y -= Y.mean(axis=0) # k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) -# m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, M=M) +# m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) # m.ensure_default_constraints() # m.randomize() # # self.assertTrue(m.checkgrad()) @@ -136,22 +136,22 @@ if __name__ == "__main__": # for k in kernels: # m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, -# M=M, kernel=k) +# num_inducing=num_inducing, kernel=k) # assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts))) # # m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z, -# M=M, kernel=GPy.kern.linear(input_dim)) +# num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim)) # m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, -# M=M, kernel=kernel) +# num_inducing=num_inducing, kernel=kernel) # m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, -# M=M, kernel=kernel) +# num_inducing=num_inducing, kernel=kernel) # m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, -# M=M, kernel=GPy.kern.rbf(input_dim)) +# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim)) m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, M=M, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim))) m3.ensure_default_constraints() # + GPy.kern.bias(input_dim)) # m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, -# M=M, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)) +# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)) else: unittest.main() diff --git a/GPy/testing/sparse_gplvm_tests.py b/GPy/testing/sparse_gplvm_tests.py index a790ce54..38f95730 100644 --- a/GPy/testing/sparse_gplvm_tests.py +++ b/GPy/testing/sparse_gplvm_tests.py @@ -4,6 +4,7 @@ import unittest import numpy as np import GPy +from GPy.models.sparse_gplvm import SparseGPLVM class sparse_GPLVMTests(unittest.TestCase): def test_bias_kern(self): @@ -11,9 +12,9 @@ class sparse_GPLVMTests(unittest.TestCase): X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M) + m = SparseGPLVM(Y, input_dim, kernel=k, M=M) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) @@ -24,9 +25,9 @@ class sparse_GPLVMTests(unittest.TestCase): X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M) + m = SparseGPLVM(Y, input_dim, kernel=k, M=M) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) @@ -36,9 +37,9 @@ class sparse_GPLVMTests(unittest.TestCase): X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M) + m = SparseGPLVM(Y, input_dim, kernel=k, M=M) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py index bb92fe27..020f3890 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/unit_tests.py @@ -5,33 +5,33 @@ import unittest import numpy as np import GPy +from GPy.likelihoods.likelihood_functions import Binomial class GradientTests(unittest.TestCase): def setUp(self): ###################################### - ## 1 dimensional example + # # 1 dimensional example # sample inputs and outputs - self.X1D = np.random.uniform(-3.,3.,(20,1)) - self.Y1D = np.sin(self.X1D)+np.random.randn(20,1)*0.05 + self.X1D = np.random.uniform(-3., 3., (20, 1)) + self.Y1D = np.sin(self.X1D) + np.random.randn(20, 1) * 0.05 ###################################### - ## 2 dimensional example + # # 2 dimensional example # sample inputs and outputs - self.X2D = np.random.uniform(-3.,3.,(40,2)) - self.Y2D = np.sin(self.X2D[:,0:1]) * np.sin(self.X2D[:,1:2])+np.random.randn(40,1)*0.05 + self.X2D = np.random.uniform(-3., 3., (40, 2)) + self.Y2D = np.sin(self.X2D[:, 0:1]) * np.sin(self.X2D[:, 1:2]) + np.random.randn(40, 1) * 0.05 - def check_model_with_white(self, kern, model_type='GP_regression', dimension=1): - #Get the correct gradients + def check_model_with_white(self, kern, model_type='GPRegression', dimension=1): + # Get the correct gradients if dimension == 1: X = self.X1D Y = self.Y1D else: X = self.X2D Y = self.Y2D - - #Get model type (GP_regression, GP_sparse_regression, etc) + # Get model type (GPRegression, SparseGPRegression, etc) model_fit = getattr(GPy.models, model_type) noise = GPy.kern.white(dimension) @@ -42,114 +42,114 @@ class GradientTests(unittest.TestCase): # contrain all parameters to be positive self.assertTrue(m.checkgrad()) - def test_gp_regression_rbf_1d(self): + def test_GPRegression_rbf_1d(self): ''' Testing the GP regression with rbf kernel with white kernel on 1d data ''' rbf = GPy.kern.rbf(1) - self.check_model_with_white(rbf, model_type='GP_regression', dimension=1) + self.check_model_with_white(rbf, model_type='GPRegression', dimension=1) - def test_GP_regression_rbf_2D(self): + def test_GPRegression_rbf_2D(self): ''' Testing the GP regression with rbf and white kernel on 2d data ''' rbf = GPy.kern.rbf(2) - self.check_model_with_white(rbf, model_type='GP_regression', dimension=2) + self.check_model_with_white(rbf, model_type='GPRegression', dimension=2) - def test_GP_regression_rbf_ARD_2D(self): + def test_GPRegression_rbf_ARD_2D(self): ''' Testing the GP regression with rbf and white kernel on 2d data ''' - k = GPy.kern.rbf(2,ARD=True) - self.check_model_with_white(k, model_type='GP_regression', dimension=2) + k = GPy.kern.rbf(2, ARD=True) + self.check_model_with_white(k, model_type='GPRegression', dimension=2) - def test_GP_regression_matern52_1D(self): + def test_GPRegression_matern52_1D(self): ''' Testing the GP regression with matern52 kernel on 1d data ''' matern52 = GPy.kern.Matern52(1) - self.check_model_with_white(matern52, model_type='GP_regression', dimension=1) + self.check_model_with_white(matern52, model_type='GPRegression', dimension=1) - def test_GP_regression_matern52_2D(self): + def test_GPRegression_matern52_2D(self): ''' Testing the GP regression with matern52 kernel on 2d data ''' matern52 = GPy.kern.Matern52(2) - self.check_model_with_white(matern52, model_type='GP_regression', dimension=2) + self.check_model_with_white(matern52, model_type='GPRegression', dimension=2) - def test_GP_regression_matern52_ARD_2D(self): + def test_GPRegression_matern52_ARD_2D(self): ''' Testing the GP regression with matern52 kernel on 2d data ''' - matern52 = GPy.kern.Matern52(2,ARD=True) - self.check_model_with_white(matern52, model_type='GP_regression', dimension=2) + matern52 = GPy.kern.Matern52(2, ARD=True) + self.check_model_with_white(matern52, model_type='GPRegression', dimension=2) - def test_GP_regression_matern32_1D(self): + def test_GPRegression_matern32_1D(self): ''' Testing the GP regression with matern32 kernel on 1d data ''' matern32 = GPy.kern.Matern32(1) - self.check_model_with_white(matern32, model_type='GP_regression', dimension=1) + self.check_model_with_white(matern32, model_type='GPRegression', dimension=1) - def test_GP_regression_matern32_2D(self): + def test_GPRegression_matern32_2D(self): ''' Testing the GP regression with matern32 kernel on 2d data ''' matern32 = GPy.kern.Matern32(2) - self.check_model_with_white(matern32, model_type='GP_regression', dimension=2) + self.check_model_with_white(matern32, model_type='GPRegression', dimension=2) - def test_GP_regression_matern32_ARD_2D(self): + def test_GPRegression_matern32_ARD_2D(self): ''' Testing the GP regression with matern32 kernel on 2d data ''' - matern32 = GPy.kern.Matern32(2,ARD=True) - self.check_model_with_white(matern32, model_type='GP_regression', dimension=2) + matern32 = GPy.kern.Matern32(2, ARD=True) + self.check_model_with_white(matern32, model_type='GPRegression', dimension=2) - def test_GP_regression_exponential_1D(self): + def test_GPRegression_exponential_1D(self): ''' Testing the GP regression with exponential kernel on 1d data ''' exponential = GPy.kern.exponential(1) - self.check_model_with_white(exponential, model_type='GP_regression', dimension=1) + self.check_model_with_white(exponential, model_type='GPRegression', dimension=1) - def test_GP_regression_exponential_2D(self): + def test_GPRegression_exponential_2D(self): ''' Testing the GP regression with exponential kernel on 2d data ''' exponential = GPy.kern.exponential(2) - self.check_model_with_white(exponential, model_type='GP_regression', dimension=2) + self.check_model_with_white(exponential, model_type='GPRegression', dimension=2) - def test_GP_regression_exponential_ARD_2D(self): + def test_GPRegression_exponential_ARD_2D(self): ''' Testing the GP regression with exponential kernel on 2d data ''' - exponential = GPy.kern.exponential(2,ARD=True) - self.check_model_with_white(exponential, model_type='GP_regression', dimension=2) + exponential = GPy.kern.exponential(2, ARD=True) + self.check_model_with_white(exponential, model_type='GPRegression', dimension=2) - def test_GP_regression_bias_kern_1D(self): + def test_GPRegression_bias_kern_1D(self): ''' Testing the GP regression with bias kernel on 1d data ''' bias = GPy.kern.bias(1) - self.check_model_with_white(bias, model_type='GP_regression', dimension=1) + self.check_model_with_white(bias, model_type='GPRegression', dimension=1) - def test_GP_regression_bias_kern_2D(self): + def test_GPRegression_bias_kern_2D(self): ''' Testing the GP regression with bias kernel on 2d data ''' bias = GPy.kern.bias(2) - self.check_model_with_white(bias, model_type='GP_regression', dimension=2) + self.check_model_with_white(bias, model_type='GPRegression', dimension=2) - def test_GP_regression_linear_kern_1D_ARD(self): + def test_GPRegression_linear_kern_1D_ARD(self): ''' Testing the GP regression with linear kernel on 1d data ''' - linear = GPy.kern.linear(1,ARD=True) - self.check_model_with_white(linear, model_type='GP_regression', dimension=1) + linear = GPy.kern.linear(1, ARD=True) + self.check_model_with_white(linear, model_type='GPRegression', dimension=1) - def test_GP_regression_linear_kern_2D_ARD(self): + def test_GPRegression_linear_kern_2D_ARD(self): ''' Testing the GP regression with linear kernel on 2d data ''' - linear = GPy.kern.linear(2,ARD=True) - self.check_model_with_white(linear, model_type='GP_regression', dimension=2) + linear = GPy.kern.linear(2, ARD=True) + self.check_model_with_white(linear, model_type='GPRegression', dimension=2) - def test_GP_regression_linear_kern_1D(self): + def test_GPRegression_linear_kern_1D(self): ''' Testing the GP regression with linear kernel on 1d data ''' linear = GPy.kern.linear(1) - self.check_model_with_white(linear, model_type='GP_regression', dimension=1) + self.check_model_with_white(linear, model_type='GPRegression', dimension=1) - def test_GP_regression_linear_kern_2D(self): + def test_GPRegression_linear_kern_2D(self): ''' Testing the GP regression with linear kernel on 2d data ''' linear = GPy.kern.linear(2) - self.check_model_with_white(linear, model_type='GP_regression', dimension=2) + self.check_model_with_white(linear, model_type='GPRegression', dimension=2) - def test_sparse_GP_regression_rbf_white_kern_1d(self): + def test_SparseGPRegression_rbf_white_kern_1d(self): ''' Testing the sparse GP regression with rbf kernel with white kernel on 1d data ''' rbf = GPy.kern.rbf(1) - self.check_model_with_white(rbf, model_type='sparse_GP_regression', dimension=1) + self.check_model_with_white(rbf, model_type='SparseGPRegression', dimension=1) - def test_sparse_GP_regression_rbf_white_kern_2D(self): + def test_SparseGPRegression_rbf_white_kern_2D(self): ''' Testing the sparse GP regression with rbf and white kernel on 2d data ''' rbf = GPy.kern.rbf(2) - self.check_model_with_white(rbf, model_type='sparse_GP_regression', dimension=2) + self.check_model_with_white(rbf, model_type='SparseGPRegression', dimension=2) def test_GPLVM_rbf_bias_white_kern_2D(self): """ Testing GPLVM with rbf + bias and white kernel """ N, input_dim, D = 50, 1, 2 X = np.random.rand(N, input_dim) - k = GPy.kern.rbf(input_dim, 0.5, 0.9*np.ones((1,))) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05) + k = GPy.kern.rbf(input_dim, 0.5, 0.9 * np.ones((1,))) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T - m = GPy.models.GPLVM(Y, input_dim, kernel = k) + Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T + m = GPy.models.GPLVM(Y, input_dim, kernel=k) m.ensure_default_constraints() self.assertTrue(m.checkgrad()) @@ -159,42 +159,43 @@ class GradientTests(unittest.TestCase): X = np.random.rand(N, input_dim) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T - m = GPy.models.GPLVM(Y, input_dim, init = 'PCA', kernel = k) + Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T + m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k) m.ensure_default_constraints() self.assertTrue(m.checkgrad()) def test_GP_EP_probit(self): N = 20 - X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None] - Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None] + X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] + Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] kernel = GPy.kern.rbf(1) - distribution = GPy.likelihoods.likelihood_functions.binomial() + distribution = GPy.likelihoods.likelihood_functions.Binomial() likelihood = GPy.likelihoods.EP(Y, distribution) m = GPy.core.GP(X, likelihood, kernel) m.ensure_default_constraints() m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) - #self.assertTrue(m.EPEM) + # self.assertTrue(m.EPEM) def test_sparse_EP_DTC_probit(self): N = 20 - X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None] - Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None] - Z = np.linspace(0,15,4)[:,None] + X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] + Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] + Z = np.linspace(0, 15, 4)[:, None] kernel = GPy.kern.rbf(1) - distribution = GPy.likelihoods.likelihood_functions.binomial() + distribution = GPy.likelihoods.likelihood_functions.Binomial() likelihood = GPy.likelihoods.EP(Y, distribution) - m = GPy.core.sparse_GP(X, likelihood, kernel,Z) + m = GPy.core.SparseGP(X, likelihood, kernel, Z) m.ensure_default_constraints() m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) def test_generalized_FITC(self): N = 20 - X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None] + X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None] k = GPy.kern.rbf(1) + GPy.kern.white(1) Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None] + distribution = GPy.likelihoods.likelihood_functions.binomial() likelihood = GPy.likelihoods.EP(Y, distribution) #likelihood = GPy.inference.likelihoods.binomial(Y) diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 36fa4f61..8f22a7d0 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -13,7 +13,7 @@ default_seed = 10000 # Some general utilities. def sample_class(f): p = 1. / (1. + np.exp(-f)) - c = np.random.binomial(1, p) + c = np.random.Binomial(1, p) c = np.where(c, 1, -1) return c diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 6e7d26d8..c04fc460 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -1,87 +1,80 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -#tdot function courtesy of Ian Murray: +# tdot function courtesy of Ian Murray: # Iain Murray, April 2013. iain contactable via iainmurray.net # http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot.py import numpy as np -from scipy import linalg, optimize, weave -import pylab as pb -import Tango -import sys -import re -import pdb -import cPickle +from scipy import linalg, weave import types import ctypes from ctypes import byref, c_char, c_int, c_double # TODO -#import scipy.lib.lapack -import scipy as sp +# import scipy.lib.lapack try: - _blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__) + _blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__) # @UndefinedVariable _blas_available = True except: _blas_available = False -def trace_dot(a,b): +def trace_dot(a, b): """ efficiently compute the trace of the matrix product of a and b """ - return np.sum(a*b) + return np.sum(a * b) def mdot(*args): - """Multiply all the arguments using matrix product rules. - The output is equivalent to multiplying the arguments one by one - from left to right using dot(). - Precedence can be controlled by creating tuples of arguments, - for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)). - Note that this means the output of dot(a,b) and mdot(a,b) will differ if - a or b is a pure tuple of numbers. - """ - if len(args)==1: - return args[0] - elif len(args)==2: - return _mdot_r(args[0],args[1]) - else: - return _mdot_r(args[:-1],args[-1]) + """Multiply all the arguments using matrix product rules. + The output is equivalent to multiplying the arguments one by one + from left to right using dot(). + Precedence can be controlled by creating tuples of arguments, + for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)). + Note that this means the output of dot(a,b) and mdot(a,b) will differ if + a or b is a pure tuple of numbers. + """ + if len(args) == 1: + return args[0] + elif len(args) == 2: + return _mdot_r(args[0], args[1]) + else: + return _mdot_r(args[:-1], args[-1]) -def _mdot_r(a,b): - """Recursive helper for mdot""" - if type(a)==types.TupleType: - if len(a)>1: - a = mdot(*a) - else: - a = a[0] - if type(b)==types.TupleType: - if len(b)>1: - b = mdot(*b) - else: - b = b[0] - return np.dot(a,b) +def _mdot_r(a, b): + """Recursive helper for mdot""" + if type(a) == types.TupleType: + if len(a) > 1: + a = mdot(*a) + else: + a = a[0] + if type(b) == types.TupleType: + if len(b) > 1: + b = mdot(*b) + else: + b = b[0] + return np.dot(a, b) -def jitchol(A,maxtries=5): +def jitchol(A, maxtries=5): A = np.asfortranarray(A) - L,info = linalg.lapack.flapack.dpotrf(A,lower=1) - if info ==0: + L, info = linalg.lapack.flapack.dpotrf(A, lower=1) + if info == 0: return L else: diagA = np.diag(A) - if np.any(diagA<0.): + if np.any(diagA < 0.): raise linalg.LinAlgError, "not pd: negative diagonal elements" - jitter= diagA.mean()*1e-6 - for i in range(1,maxtries+1): + jitter = diagA.mean() * 1e-6 + for i in range(1, maxtries + 1): print 'Warning: adding jitter of {:.10e}'.format(jitter) try: - return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True) + return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) except: jitter *= 10 - raise linalg.LinAlgError,"not positive definite, even with jitter." + raise linalg.LinAlgError, "not positive definite, even with jitter." -def jitchol_old(A,maxtries=5): +def jitchol_old(A, maxtries=5): """ :param A : An almost pd square matrix @@ -93,20 +86,20 @@ def jitchol_old(A,maxtries=5): np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T) """ try: - return linalg.cholesky(A, lower = True) + return linalg.cholesky(A, lower=True) except linalg.LinAlgError: diagA = np.diag(A) - if np.any(diagA<0.): + if np.any(diagA < 0.): raise linalg.LinAlgError, "not pd: negative diagonal elements" - jitter= diagA.mean()*1e-6 - for i in range(1,maxtries+1): + jitter = diagA.mean() * 1e-6 + for i in range(1, maxtries + 1): print '\rWarning: adding jitter of {:.10e} '.format(jitter), try: - return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True) + return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) except: jitter *= 10 - raise linalg.LinAlgError,"not positive definite, even with jitter." + raise linalg.LinAlgError, "not positive definite, even with jitter." def pdinv(A, *args): """ @@ -125,7 +118,7 @@ def pdinv(A, *args): logdet = 2.*np.sum(np.log(np.diag(L))) Li = chol_inv(L) Ai, _ = linalg.lapack.flapack.dpotri(L) - #Ai = np.tril(Ai) + np.tril(Ai,-1).T + # Ai = np.tril(Ai) + np.tril(Ai,-1).T symmetrify(Ai) return Ai, L, Li, logdet @@ -140,7 +133,7 @@ def chol_inv(L): """ - return linalg.lapack.flapack.dtrtri(L, lower = True)[0] + return linalg.lapack.flapack.dtrtri(L, lower=True)[0] def multiple_pdinv(A): @@ -155,11 +148,11 @@ def multiple_pdinv(A): hld: 0.5* the log of the determinants of A """ N = A.shape[-1] - chols = [jitchol(A[:,:,i]) for i in range(N)] + chols = [jitchol(A[:, :, i]) for i in range(N)] halflogdets = [np.sum(np.log(np.diag(L[0]))) for L in chols] - invs = [linalg.lapack.flapack.dpotri(L[0],True)[0] for L in chols] - invs = [np.triu(I)+np.triu(I,1).T for I in invs] - return np.dstack(invs),np.array(halflogdets) + invs = [linalg.lapack.flapack.dpotri(L[0], True)[0] for L in chols] + invs = [np.triu(I) + np.triu(I, 1).T for I in invs] + return np.dstack(invs), np.array(halflogdets) def PCA(Y, input_dim): @@ -179,18 +172,18 @@ def PCA(Y, input_dim): if not np.allclose(Y.mean(axis=0), 0.0): print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)" - #Y -= Y.mean(axis=0) + # Y -= Y.mean(axis=0) - Z = linalg.svd(Y-Y.mean(axis=0), full_matrices = False) - [X, W] = [Z[0][:,0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:,0:input_dim]] + Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False) + [X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]] v = X.std(axis=0) X /= v; W *= v; return X, W.T -def tdot_numpy(mat,out=None): - return np.dot(mat,mat.T,out) +def tdot_numpy(mat, out=None): + return np.dot(mat, mat.T, out) def tdot_blas(mat, out=None): """returns np.dot(mat, mat.T), but faster for large 2D arrays of doubles.""" @@ -198,16 +191,16 @@ def tdot_blas(mat, out=None): return np.dot(mat, mat.T) nn = mat.shape[0] if out is None: - out = np.zeros((nn,nn)) + out = np.zeros((nn, nn)) else: assert(out.dtype == 'float64') - assert(out.shape == (nn,nn)) + assert(out.shape == (nn, nn)) # FIXME: should allow non-contiguous out, and copy output into it: assert(8 in out.strides) # zeroing needed because of dumb way I copy across triangular answer out[:] = 0.0 - ## Call to DSYRK from BLAS + # # Call to DSYRK from BLAS # If already in Fortran order (rare), and has the right sorts of strides I # could avoid the copy. I also thought swapping to cblas API would allow use # of C order. However, I tried that and had errors with large matrices: @@ -226,17 +219,17 @@ def tdot_blas(mat, out=None): _blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K), byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC)) - symmetrify(out,upper=True) + symmetrify(out, upper=True) return out def tdot(*args, **kwargs): if _blas_available: - return tdot_blas(*args,**kwargs) + return tdot_blas(*args, **kwargs) else: - return tdot_numpy(*args,**kwargs) + return tdot_numpy(*args, **kwargs) -def DSYR_blas(A,x,alpha=1.): +def DSYR_blas(A, x, alpha=1.): """ Performs a symmetric rank-1 update operation: A <- A + alpha * np.dot(x,x.T) @@ -256,9 +249,9 @@ def DSYR_blas(A,x,alpha=1.): INCX = c_int(1) _blaslib.dsyr_(byref(UPLO), byref(N), byref(ALPHA), x_, byref(INCX), A_, byref(LDA)) - symmetrify(A,upper=True) + symmetrify(A, upper=True) -def DSYR_numpy(A,x,alpha=1.): +def DSYR_numpy(A, x, alpha=1.): """ Performs a symmetric rank-1 update operation: A <- A + alpha * np.dot(x,x.T) @@ -269,23 +262,23 @@ def DSYR_numpy(A,x,alpha=1.): :param x: Nx1 np.array :param alpha: scalar """ - A += alpha*np.dot(x[:,None],x[None,:]) + A += alpha * np.dot(x[:, None], x[None, :]) def DSYR(*args, **kwargs): if _blas_available: - return DSYR_blas(*args,**kwargs) + return DSYR_blas(*args, **kwargs) else: - return DSYR_numpy(*args,**kwargs) + return DSYR_numpy(*args, **kwargs) -def symmetrify(A,upper=False): +def symmetrify(A, upper=False): """ Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper works IN PLACE. """ - N,M = A.shape - assert N==M + N, M = A.shape + assert N == M c_contig_code = """ int iN; for (int i=1; i """ - code=""" + code = """ double r,c,s; int j,i; for(j=0; j