diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py index e49541b0..32b6c02d 100644 --- a/GPy/core/__init__.py +++ b/GPy/core/__init__.py @@ -1,8 +1,9 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from GP import GP -from sparse_GP import sparse_GP from model import * from parameterised import * import priors +from GPy.core.gp import GP +from GPy.core.sparse_gp import SparseGP +from fitc import FITC diff --git a/GPy/models/FITC.py b/GPy/core/fitc.py similarity index 68% rename from GPy/models/FITC.py rename to GPy/core/fitc.py index e8078780..604db5e8 100644 --- a/GPy/models/FITC.py +++ b/GPy/core/fitc.py @@ -7,57 +7,62 @@ from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify,pdinv from ..util.plot import gpplot from .. import kern from scipy import stats, linalg -from ..core import sparse_GP +from sparse_gp import SparseGP -def backsub_both_sides(L,X): - """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky""" - tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1) - return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T +class FITC(SparseGP): + """ + sparse FITC approximation -class FITC(sparse_GP): + :param X: inputs + :type X: np.ndarray (num_data x Q) + :param likelihood: a likelihood instance, containing the observed data + :type likelihood: GPy.likelihood.(Gaussian | EP) + :param kernel : the kernel (covariance function). See link kernels + :type kernel: a GPy.kern.kern instance + :param Z: inducing inputs (optional, see note) + :type Z: np.ndarray (M x Q) | None + :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) + :type normalize_(X|Y): bool + """ - def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): - super(FITC, self).__init__(X, likelihood, kernel, normalize_X=normalize_X) + def __init__(self, X, likelihood, kernel, Z, normalize_X=False): + SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False) + assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs" def update_likelihood_approximation(self): """ - Approximates a non-gaussian likelihood using Expectation Propagation + Approximates a non-Gaussian likelihood using Expectation Propagation - For a Gaussian (or direct: TODO) likelihood, no iteration is required: + For a Gaussian likelihood, no iteration is required: this function does nothing - - Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP. - The true precison is now 'true_precision' not 'precision'. """ - if self.has_uncertain_inputs: - raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" - else: - self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) - self._set_params(self._get_params()) # update the GP + self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) + self._set_params(self._get_params()) # update the GP + + def _compute_kernel_matrices(self): + # kernel computations, using BGPLVM notation + self.Kmm = self.kern.K(self.Z) + self.psi0 = self.kern.Kdiag(self.X) + self.psi1 = self.kern.K(self.Z, self.X) + self.psi2 = None def _computations(self): - #factor Kmm self.Lm = jitchol(self.Kmm) - self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1) + self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.num_inducing),lower=1) Lmipsi1 = np.dot(self.Lmi,self.psi1) self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy() self.Diag0 = self.psi0 - np.diag(self.Qnn) - self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #Includes Diag0 in the precision + self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #NOTE: beta_star contains Diag0 and the precision self.V_star = self.beta_star * self.likelihood.Y # The rather complex computations of self.A - if self.has_uncertain_inputs: - raise NotImplementedError - else: - if self.likelihood.is_heteroscedastic: - assert self.likelihood.D == 1 - tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N))) - tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) - self.A = tdot(tmp) + tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data))) + tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) + self.A = tdot(tmp) # factor B - self.B = np.eye(self.M) + self.A + self.B = np.eye(self.num_inducing) + self.A self.LB = jitchol(self.B) self.LBi = chol_inv(self.LB) self.psi1V = np.dot(self.psi1, self.V_star) @@ -108,18 +113,12 @@ class FITC(sparse_GP): self._dpsi1_dX_jkj = 0 self._dpsi1_dtheta_jkj = 0 - for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3): + for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.num_data),self.V_star,alpha,gamma_2,gamma_3): K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T) - - #Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1 _dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:] - - #Diag_dKmm = Diag_dA_dKmm: yT*beta_star*y +Diag_dC_dKmm +Diag_dD_dKmm _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm - self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z) self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z) - self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm ,self.Z) self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:]) @@ -128,7 +127,7 @@ class FITC(sparse_GP): # save computation here. self.partial_for_likelihood = None elif self.likelihood.is_heteroscedastic: - raise NotImplementedError, "heteroscedatic derivates not implemented" + raise NotImplementedError, "heteroscedatic derivates not implemented." else: # likelihood is not heterscedatic dbstar_dnoise = self.likelihood.precision * (self.beta_star**2 * self.Diag0[:,None] - self.beta_star) @@ -138,14 +137,14 @@ class FITC(sparse_GP): aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1) aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V) - dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise) + dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise) dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T) alpha = mdot(LBiLmipsi1,self.V_star) alpha_ = mdot(LBiLmipsi1.T,alpha) - dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise ) + dD_dnoise_2 = -0.5 * self.input_dim * np.sum(alpha_**2 * dbstar_dnoise ) dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y) dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star) @@ -155,8 +154,8 @@ class FITC(sparse_GP): def log_likelihood(self): """ Compute the (lower bound on the) log marginal likelihood """ - A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y) - C = -self.D * (np.sum(np.log(np.diag(self.LB)))) + A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y) + C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) return A + C + D @@ -165,35 +164,30 @@ class FITC(sparse_GP): return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood))) def dL_dtheta(self): - if self.has_uncertain_inputs: - raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" - else: - dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X) - dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z) - dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z) - dL_dtheta += self._dKmm_dtheta - dL_dtheta += self._dpsi1_dtheta + dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X) + dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z) + dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z) + dL_dtheta += self._dKmm_dtheta + dL_dtheta += self._dpsi1_dtheta return dL_dtheta def dL_dZ(self): - if self.has_uncertain_inputs: - raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" - else: - dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X) - dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z) - dL_dZ += self._dpsi1_dX - dL_dZ += self._dKmm_dX + dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X) + dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm,X=self.Z) + dL_dZ += self._dpsi1_dX + dL_dZ += self._dKmm_dX return dL_dZ - def _raw_predict(self, Xnew, which_parts, full_cov=False): + def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False): + assert X_variance_new is None, "FITC model is not defined for handling uncertain inputs." if self.likelihood.is_heteroscedastic: Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.likelihood.precision.flatten()) self.Diag = self.Diag0 * Iplus_Dprod_i self.P = Iplus_Dprod_i[:,None] * self.psi1.T self.RPT0 = np.dot(self.Lmi,self.psi1) - self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T)) - self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1) + self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T)) + self.R,info = linalg.lapack.flapack.dtrtrs(self.L,self.Lmi,lower=1) self.RPT = np.dot(self.R,self.P.T) self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT) self.w = self.Diag * self.likelihood.v_tilde @@ -210,13 +204,13 @@ class FITC(sparse_GP): # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T) # Ci = I + (RPT0)Di(RPT0).T - # C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T - # = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T + # C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T + # = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T # = I - [RPT0] * (U*U.T)^-1 * [RPT0].T # = I - V.T * V U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn) - V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1) - C = np.eye(self.M) - np.dot(V.T,V) + V,info = linalg.lapack.flapack.dtrtrs(U,self.RPT0.T,lower=1) + C = np.eye(self.num_inducing) - np.dot(V.T,V) mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:]) #self.C = C #self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T @@ -232,13 +226,13 @@ class FITC(sparse_GP): mu_star = np.dot(KR0T,mu_H) if full_cov: Kxx = self.kern.K(Xnew,which_parts=which_parts) - var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) + var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T)) else: Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts) - var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None] + var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T),0))[:,None] return mu_star[:,None],var else: - raise NotImplementedError, "homoscedastic fitc not implemented" + raise NotImplementedError, "Heteroscedastic case not implemented." """ Kx = self.kern.K(self.Z, Xnew) mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) diff --git a/GPy/core/GP.py b/GPy/core/gp.py similarity index 88% rename from GPy/core/GP.py rename to GPy/core/gp.py index 04ea7af1..246b8cc9 100644 --- a/GPy/core/GP.py +++ b/GPy/core/gp.py @@ -33,8 +33,8 @@ class GP(GPBase): self._set_params(self._get_params()) def _set_params(self, p): - self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()]) - self.likelihood._set_params(p[self.kern.Nparam_transformed():]) + self.kern._set_params_transformed(p[:self.kern.num_params_transformed()]) + self.likelihood._set_params(p[self.kern.num_params_transformed():]) self.K = self.kern.K(self.X) self.K += self.likelihood.covariance_matrix @@ -46,12 +46,12 @@ class GP(GPBase): #alpha = np.dot(self.Ki, self.likelihood.Y) alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1) - self.dL_dK = 0.5 * (tdot(alpha) - self.D * self.Ki) + self.dL_dK = 0.5 * (tdot(alpha) - self.output_dim * self.Ki) else: #tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki) tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1) tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1) - self.dL_dK = 0.5 * (tmp - self.D * self.Ki) + self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki) def _get_params(self): return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params())) @@ -89,7 +89,7 @@ class GP(GPBase): model for a new variable Y* = v_tilde/tau_tilde, with a covariance matrix K* = K + diag(1./tau_tilde) plus a normalization term. """ - return -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z + return -0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z def _log_likelihood_gradients(self): @@ -117,7 +117,7 @@ class GP(GPBase): var = Kxx - np.sum(np.multiply(KiKx, Kx), 0) var = var[:, None] if stop: - debug_this + debug_this # @UndefinedVariable return mu, var def predict(self, Xnew, which_parts='all', full_cov=False): @@ -131,12 +131,12 @@ class GP(GPBase): :type which_parts: ('all', list of bools) :param full_cov: whether to return the folll covariance matrix, or just the diagonal :type full_cov: bool - :rtype: posterior mean, a Numpy array, Nnew x self.D + :rtype: posterior mean, a Numpy array, Nnew x self.input_dim :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise - :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D + :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim - If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew. + If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. This is to allow for different normalizations of the output dimensions. """ diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index aa71b550..9188fe6f 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -1,34 +1,34 @@ import numpy as np -import model from .. import kern from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D import pylab as pb +from GPy.core.model import Model -class GPBase(model.model): +class GPBase(Model): """ - Gaussian Process model for holding shared behaviour between + Gaussian Process Model for holding shared behaviour between sprase_GP and GP models """ def __init__(self, X, likelihood, kernel, normalize_X=False): self.X = X assert len(self.X.shape) == 2 - self.N, self.input_dim = self.X.shape + self.num_data, self.input_dim = self.X.shape assert isinstance(kernel, kern.kern) self.kern = kernel self.likelihood = likelihood assert self.X.shape[0] == self.likelihood.data.shape[0] - self.N, self.D = self.likelihood.data.shape + self.num_data, self.output_dim = self.likelihood.data.shape if normalize_X: self._Xmean = X.mean(0)[None, :] self._Xstd = X.std(0)[None, :] self.X = (X.copy() - self._Xmean) / self._Xstd else: - self._Xmean = np.zeros((1,self.input_dim)) - self._Xstd = np.ones((1,self.input_dim)) + self._Xmean = np.zeros((1, self.input_dim)) + self._Xstd = np.ones((1, self.input_dim)) - model.model.__init__(self) + Model.__init__(self) # All leaf nodes should call self._set_params(self._get_params()) at # the end @@ -70,7 +70,7 @@ class GPBase(model.model): else: m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True) Ysim = np.random.multivariate_normal(m.flatten(), v, samples) - gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None,], axes=ax) + gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None, ], axes=ax) for i in range(samples): ax.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25) ax.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5) @@ -84,8 +84,8 @@ class GPBase(model.model): Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution) m, v = self._raw_predict(Xnew, which_parts=which_parts) m = m.reshape(resolution, resolution).T - ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) - ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) + ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable + ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable ax.set_xlim(xmin[0], xmax[0]) ax.set_ylim(xmin[1], xmax[1]) else: @@ -94,9 +94,9 @@ class GPBase(model.model): def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None): """ TODO: Docstrings! + :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure - """ # TODO include samples if which_data == 'all': @@ -108,27 +108,27 @@ class GPBase(model.model): if self.X.shape[1] == 1: - Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now + Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) - m, var, lower, upper = self.predict(Xnew, which_parts=which_parts) + m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) for d in range(m.shape[1]): - gpplot(Xnew, m[:,d], lower[:,d], upper[:,d],axes=ax) - ax.plot(Xu[which_data], self.likelihood.data[which_data,d], 'kx', mew=1.5) + gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) + ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5) ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper)) ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin) ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) - elif self.X.shape[1] == 2: # FIXME + elif self.X.shape[1] == 2: # FIXME resolution = resolution or 50 - Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) + Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) - m, var, lower, upper = self.predict(Xnew, which_parts=which_parts) + m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) m = m.reshape(resolution, resolution).T - ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) + ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable Yf = self.likelihood.Y.flatten() - ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable ax.set_xlim(xmin[0], xmax[0]) ax.set_ylim(xmin[1], xmax[1]) diff --git a/GPy/core/model.py b/GPy/core/model.py index 2acb9963..582d7313 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -6,37 +6,32 @@ from .. import likelihoods from ..inference import optimization from ..util.linalg import jitchol from GPy.util.misc import opt_wrapper -from parameterised import parameterised -from scipy import optimize +from parameterised import Parameterised import multiprocessing as mp import numpy as np -import priors -import re -import sys -import pdb from GPy.core.domains import POSITIVE, REAL # import numdifftools as ndt -class model(parameterised): +class Model(Parameterised): def __init__(self): - parameterised.__init__(self) + Parameterised.__init__(self) self.priors = None self.optimization_runs = [] self.sampling_runs = [] - self.preferred_optimizer = 'tnc' - #self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes + self.preferred_optimizer = 'scg' + # self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes def _get_params(self): - raise NotImplementedError, "this needs to be implemented to use the model class" + raise NotImplementedError, "this needs to be implemented to use the Model class" def _set_params(self, x): - raise NotImplementedError, "this needs to be implemented to use the model class" + raise NotImplementedError, "this needs to be implemented to use the Model class" def log_likelihood(self): - raise NotImplementedError, "this needs to be implemented to use the model class" + raise NotImplementedError, "this needs to be implemented to use the Model class" def _log_likelihood_gradients(self): - raise NotImplementedError, "this needs to be implemented to use the model class" + raise NotImplementedError, "this needs to be implemented to use the Model class" def set_prior(self, regexp, what): """ - Sets priors on the model parameters. + Sets priors on the Model parameters. Arguments --------- @@ -65,7 +60,7 @@ class model(parameterised): if len(tie_matches) > 1: raise ValueError, "cannot place Prior across multiple ties" elif len(tie_matches) == 1: - which = which[:1] # just place a Prior object on the first parameter + which = which[:1] # just place a Prior object on the first parameter # check constraints are okay @@ -95,7 +90,7 @@ class model(parameterised): def get_gradient(self, name, return_names=False): """ - Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. + Get Model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. """ matches = self.grep_param_names(name) if len(matches): @@ -135,7 +130,7 @@ class model(parameterised): def randomize(self): """ - Randomize the model. + Randomize the Model. Make this draw from the Prior if one exists, else draw from N(0,1) """ # first take care of all parameters (from N(0,1)) @@ -147,16 +142,16 @@ class model(parameterised): if self.priors is not None: [np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None] self._set_params(x) - self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...) + self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...) - def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): + def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): """ - Perform random restarts of the model, and set the model to the best + Perform random restarts of the Model, and set the Model to the best seen solution. If the robust flag is set, exceptions raised during optimizations will - be handled silently. If _all_ runs fail, the model is reset to the + be handled silently. If _all_ runs fail, the Model is reset to the existing parameter values. Notes @@ -179,19 +174,19 @@ class model(parameterised): try: jobs = [] pool = mp.Pool(processes=num_processes) - for i in range(Nrestarts): + for i in range(num_restarts): self.randomize() job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs) jobs.append(job) - pool.close() # signal that no more data coming in - pool.join() # wait for all the tasks to complete + pool.close() # signal that no more data coming in + pool.join() # wait for all the tasks to complete except KeyboardInterrupt: print "Ctrl+c received, terminating and joining pool." pool.terminate() pool.join() - for i in range(Nrestarts): + for i in range(num_restarts): try: if not parallel: self.randomize() @@ -200,10 +195,10 @@ class model(parameterised): self.optimization_runs.append(jobs[i].get()) if verbose: - print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt)) + print("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt)) except Exception as e: if robust: - print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts)) + print("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts)) else: raise e @@ -218,20 +213,16 @@ class model(parameterised): Ensure that any variables which should clearly be positive have been constrained somehow. """ positive_strings = ['variance', 'lengthscale', 'precision', 'kappa'] - param_names = self._get_param_names() + # param_names = self._get_param_names() currently_constrained = self.all_constrained_indices() to_make_positive = [] for s in positive_strings: - for i in self.grep_param_names(".*"+s): + for i in self.grep_param_names(".*" + s): if not (i in currently_constrained): - #to_make_positive.append(re.escape(param_names[i])) to_make_positive.append(i) if len(to_make_positive): - #self.constrain_positive('(' + '|'.join(to_make_positive) + ')') self.constrain_positive(np.asarray(to_make_positive)) - - def objective_function(self, x): """ The objective function passed to the optimizer. It combines the likelihood and the priors. @@ -244,18 +235,18 @@ class model(parameterised): Gets the gradients from the likelihood and the priors. """ self._set_params_transformed(x) - obj_grads = - self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) + obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) return obj_grads def objective_and_gradients(self, x): self._set_params_transformed(x) obj_f = -self.log_likelihood() - self.log_prior() - obj_grads = - self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) + obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) return obj_f, obj_grads def optimize(self, optimizer=None, start=None, **kwargs): """ - Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. + Optimize the Model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. kwargs are passed to the optimizer. They can be: :max_f_eval: maximum number of function evaluations @@ -278,7 +269,7 @@ class model(parameterised): def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs): # assert self.Y.shape[1] > 1, "SGD only works with D > 1" - sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) + sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) # @UndefinedVariable sgd.run() self.optimization_runs.append(sgd) @@ -295,7 +286,7 @@ class model(parameterised): def f(x): self._set_params(x) return self.log_likelihood() - h = ndt.Hessian(f) + h = ndt.Hessian(f) # @UndefinedVariable A = -h(x) self._set_params(x) # check for almost zero components on the diagonal which screw up the cholesky @@ -304,7 +295,7 @@ class model(parameterised): return A def Laplace_evidence(self): - """Returns an estiamte of the model evidence based on the Laplace approximation. + """Returns an estiamte of the Model evidence based on the Laplace approximation. Uses a numerical estimate of the hessian if none is available analytically""" A = self.Laplace_covariance() try: @@ -314,12 +305,12 @@ class model(parameterised): return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld def __str__(self): - s = parameterised.__str__(self).split('\n') + s = Parameterised.__str__(self).split('\n') # add priors to the string if self.priors is not None: strs = [str(p) if p is not None else '' for p in self.priors] else: - strs = ['']*len(self._get_params()) + strs = [''] * len(self._get_params()) width = np.array(max([len(p) for p in strs] + [5])) + 4 log_like = self.log_likelihood() @@ -340,7 +331,7 @@ class model(parameterised): def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3): """ - Check the gradient of the model by comparing to a numerical estimate. + Check the gradient of the Model by comparing to a numerical estimate. If the verbose flag is passed, invividual components are tested (and printed) :param verbose: If True, print a "full" checking of each parameter @@ -392,7 +383,11 @@ class model(parameterised): if target_param is None: param_list = range(len(x)) else: - param_list = self.grep_param_names(target_param) + param_list = self.grep_param_names(target_param, transformed=True, search=True) + if not np.any(param_list): + print "No free parameters to check" + return + for i in param_list: xx = x.copy() @@ -419,15 +414,15 @@ class model(parameterised): def input_sensitivity(self): """ - return an array describing the sesitivity of the model to each input + return an array describing the sesitivity of the Model to each input NB. Right now, we're basing this on the lengthscales (or variances) of the kernel. TODO: proper sensitivity analysis - where we integrate across the model inputs and evaluate the - effect on the variance of the model output. """ + where we integrate across the Model inputs and evaluate the + effect on the variance of the Model output. """ if not hasattr(self, 'kern'): - raise ValueError, "this model has no kernel" + raise ValueError, "this Model has no kernel" k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']] if (not len(k) == 1) or (not k[0].ARD): @@ -474,8 +469,8 @@ class model(parameterised): ll_change = new_ll - last_ll if ll_change < 0: - self.likelihood = last_approximation # restore previous likelihood approximation - self._set_params(last_params) # restore model parameters + self.likelihood = last_approximation # restore previous likelihood approximation + self._set_params(last_params) # restore Model parameters print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change stop = True else: diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py index 7afeb1af..b3a5712a 100644 --- a/GPy/core/parameterised.py +++ b/GPy/core/parameterised.py @@ -6,12 +6,10 @@ import numpy as np import re import copy import cPickle -import os -from ..util.squashers import sigmoid import warnings import transformations -class parameterised(object): +class Parameterised(object): def __init__(self): """ This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters @@ -36,7 +34,7 @@ class parameterised(object): """ Returns a **copy** of parameters in non transformed space - :see_also: :py:func:`GPy.core.parameterised.params_transformed` + :see_also: :py:func:`GPy.core.Parameterised.params_transformed` """ return self._get_params() @@ -49,7 +47,7 @@ class parameterised(object): """ Returns a **copy** of parameters in transformed space - :see_also: :py:func:`GPy.core.parameterised.params` + :see_also: :py:func:`GPy.core.Parameterised.params` """ return self._get_params_transformed() @@ -85,7 +83,7 @@ class parameterised(object): else: return self._get_params()[matches] else: - raise AttributeError, "no parameter matches %s" % name + raise AttributeError, "no parameter matches %s" % regexp def __setitem__(self, name, val): """ @@ -113,13 +111,13 @@ class parameterised(object): if hasattr(self, 'prior'): pass - self._set_params_transformed(self._get_params_transformed()) # sets tied parameters to single value + self._set_params_transformed(self._get_params_transformed()) # sets tied parameters to single value def untie_everything(self): """Unties all parameters by setting tied_indices to an empty list.""" self.tied_indices = [] - def grep_param_names(self, regexp): + def grep_param_names(self, regexp, transformed=False, search=False): """ :param regexp: regular expression to select parameter names :type regexp: re | str | int @@ -129,15 +127,23 @@ class parameterised(object): Other objects are passed through - i.e. integers which weren't meant for grepping """ + if transformed: + names = self._get_param_names_transformed() + else: + names = self._get_param_names() + if type(regexp) in [str, np.string_, np.str]: regexp = re.compile(regexp) - return np.nonzero([regexp.match(name) for name in self._get_param_names()])[0] elif type(regexp) is re._pattern_type: - return np.nonzero([regexp.match(name) for name in self._get_param_names()])[0] + pass else: return regexp + if search: + return np.nonzero([regexp.search(name) for name in names])[0] + else: + return np.nonzero([regexp.match(name) for name in names])[0] - def Nparam_transformed(self): + def num_params_transformed(self): removed = 0 for tie in self.tied_indices: removed += tie.size - 1 @@ -151,18 +157,18 @@ class parameterised(object): """Unconstrain matching parameters. does not untie parameters""" matches = self.grep_param_names(regexp) - #tranformed contraints: + # tranformed contraints: for match in matches: - self.constrained_indices = [i[i<>match] for i in self.constrained_indices] + self.constrained_indices = [i[i <> match] for i in self.constrained_indices] - #remove empty constraints - tmp = zip(*[(i,t) for i,t in zip(self.constrained_indices,self.constraints) if len(i)]) + # remove empty constraints + tmp = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)]) if tmp: - self.constrained_indices, self.constraints = zip(*[(i,t) for i,t in zip(self.constrained_indices,self.constraints) if len(i)]) + self.constrained_indices, self.constraints = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)]) self.constrained_indices, self.constraints = list(self.constrained_indices), list(self.constraints) # fixed: - self.fixed_values = [np.delete(values, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices,values in zip(self.fixed_indices,self.fixed_values)] + self.fixed_values = [np.delete(values, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices, values in zip(self.fixed_indices, self.fixed_values)] self.fixed_indices = [np.delete(indices, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices in self.fixed_indices] # remove empty elements @@ -181,7 +187,7 @@ class parameterised(object): """ Set positive constraints. """ self.constrain(regexp, transformations.logexp()) - def constrain_bounded(self, regexp,lower, upper): + def constrain_bounded(self, regexp, lower, upper): """ Set bounded constraints. """ self.constrain(regexp, transformations.logistic(lower, upper)) @@ -191,8 +197,8 @@ class parameterised(object): else: return np.empty(shape=(0,)) - def constrain(self,regexp,transform): - assert isinstance(transform,transformations.transformation) + def constrain(self, regexp, transform): + assert isinstance(transform, transformations.transformation) matches = self.grep_param_names(regexp) overlap = set(matches).intersection(set(self.all_constrained_indices())) @@ -223,7 +229,6 @@ class parameterised(object): To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes """ matches = self.grep_param_names(regexp) - overlap = set(matches).intersection(set(self.all_constrained_indices())) if overlap: self.unconstrain(np.asarray(list(overlap))) @@ -244,7 +249,7 @@ class parameterised(object): def _get_params_transformed(self): """use self._get_params to get the 'true' parameters of the model, which are then tied, constrained and fixed""" x = self._get_params() - [np.put(x,i,t.finv(x[i])) for i,t in zip(self.constrained_indices,self.constraints)] + [np.put(x, i, t.finv(x[i])) for i, t in zip(self.constrained_indices, self.constraints)] to_remove = self.fixed_indices + [t[1:] for t in self.tied_indices] if len(to_remove): @@ -256,7 +261,7 @@ class parameterised(object): """ takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params""" self._set_params(self._untransform_params(x)) - def _untransform_params(self,x): + def _untransform_params(self, x): """ The transformation required for _set_params_transformed. @@ -283,9 +288,9 @@ class parameterised(object): [np.put(xx, i, v) for i, v in zip(self.fixed_indices, self.fixed_values)] [np.put(xx, i, v) for i, v in [(t[1:], xx[t[0]]) for t in self.tied_indices] ] - [np.put(xx,i,t.f(xx[i])) for i,t in zip(self.constrained_indices, self.constraints)] - if hasattr(self,'debug'): - stop + [np.put(xx, i, t.f(xx[i])) for i, t in zip(self.constrained_indices, self.constraints)] + if hasattr(self, 'debug'): + stop # @UndefinedVariable return xx @@ -309,7 +314,7 @@ class parameterised(object): remove = np.hstack((remove, np.hstack(self.fixed_indices))) # add markers to show that some variables are constrained - for i,t in zip(self.constrained_indices,self.constraints): + for i, t in zip(self.constrained_indices, self.constraints): for ii in i: n[ii] = n[ii] + t.__str__() @@ -326,10 +331,10 @@ class parameterised(object): if not N: return "This object has no free parameters." header = ['Name', 'Value', 'Constraints', 'Ties'] - values = self._get_params() # map(str,self._get_params()) + values = self._get_params() # map(str,self._get_params()) # sort out the constraints constraints = [''] * len(names) - for i,t in zip(self.constrained_indices,self.constraints): + for i, t in zip(self.constrained_indices, self.constraints): for ii in i: constraints[ii] = t.__str__() for i in self.fixed_indices: @@ -347,7 +352,7 @@ class parameterised(object): max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])]) max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])]) cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4 - columns = cols.sum() + # columns = cols.sum() header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))] header_string = map(lambda x: '|'.join(x), [header_string]) diff --git a/GPy/core/priors.py b/GPy/core/priors.py index 7b6379de..43090ae3 100644 --- a/GPy/core/priors.py +++ b/GPy/core/priors.py @@ -99,9 +99,9 @@ class MultivariateGaussian: assert len(self.var.shape) == 2 assert self.var.shape[0] == self.var.shape[1] assert self.var.shape[0] == self.mu.size - self.D = self.mu.size + self.input_dim = self.mu.size self.inv, self.hld = pdinv(self.var) - self.constant = -0.5 * self.D * np.log(2 * np.pi) - self.hld + self.constant = -0.5 * self.input_dim * np.log(2 * np.pi) - self.hld def summary(self): raise NotImplementedError @@ -121,7 +121,7 @@ class MultivariateGaussian: return np.random.multivariate_normal(self.mu, self.var, n) def plot(self): - if self.D == 2: + if self.input_dim == 2: rvs = self.rvs(200) pb.plot(rvs[:, 0], rvs[:, 1], 'kx', mew=1.5) xmin, xmax = pb.xlim() diff --git a/GPy/core/sparse_GP.py b/GPy/core/sparse_gp.py similarity index 82% rename from GPy/core/sparse_GP.py rename to GPy/core/sparse_gp.py index c4fe6763..2cfc8ae4 100644 --- a/GPy/core/sparse_GP.py +++ b/GPy/core/sparse_gp.py @@ -8,22 +8,22 @@ from scipy import linalg from ..likelihoods import Gaussian from gp_base import GPBase -class sparse_GP(GPBase): +class SparseGP(GPBase): """ Variational sparse GP model :param X: inputs - :type X: np.ndarray (N x input_dim) + :type X: np.ndarray (num_data x input_dim) :param likelihood: a likelihood instance, containing the observed data :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace) :param kernel : the kernel (covariance function). See link kernels :type kernel: a GPy.kern.kern instance :param X_variance: The uncertainty in the measurements of X (Gaussian variance) - :type X_variance: np.ndarray (N x input_dim) | None + :type X_variance: np.ndarray (num_data x input_dim) | None :param Z: inducing inputs (optional, see note) - :type Z: np.ndarray (M x input_dim) | None - :param M : Number of inducing points (optional, default 10. Ignored if Z is not None) - :type M: int + :type Z: np.ndarray (num_inducing x input_dim) | None + :param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None) + :type num_inducing: int :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :type normalize_(X|Y): bool """ @@ -32,7 +32,7 @@ class sparse_GP(GPBase): GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) self.Z = Z - self.M = Z.shape[0] + self.num_inducing = Z.shape[0] self.likelihood = likelihood if X_variance is None: @@ -69,7 +69,7 @@ class sparse_GP(GPBase): # The rather complex computations of self.A if self.has_uncertain_inputs: if self.likelihood.is_heteroscedastic: - psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0) + psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.num_data, 1, 1))).sum(0) else: psi2_beta = self.psi2.sum(0) * self.likelihood.precision evals, evecs = linalg.eigh(psi2_beta) @@ -77,7 +77,7 @@ class sparse_GP(GPBase): tmp = evecs * np.sqrt(clipped_evals) else: if self.likelihood.is_heteroscedastic: - tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N))) + tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.num_data))) else: tmp = self.psi1 * (np.sqrt(self.likelihood.precision)) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) @@ -85,7 +85,7 @@ class sparse_GP(GPBase): # factor B - self.B = np.eye(self.M) + self.A + self.B = np.eye(self.num_inducing) + self.A self.LB = jitchol(self.B) # TODO: make a switch for either first compute psi1V, or VV.T @@ -99,28 +99,28 @@ class sparse_GP(GPBase): # Compute dL_dKmm tmp = tdot(self._LBi_Lmi_psi1V) - self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D * np.eye(self.M) + tmp) + self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.output_dim * np.eye(self.num_inducing) + tmp) tmp = -0.5 * self.DBi_plus_BiPBi - tmp += -0.5 * self.B * self.D - tmp += self.D * np.eye(self.M) + tmp += -0.5 * self.B * self.output_dim + tmp += self.output_dim * np.eye(self.num_inducing) self.dL_dKmm = backsub_both_sides(self.Lm, tmp) # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case - self.dL_dpsi0 = -0.5 * self.D * (self.likelihood.precision * np.ones([self.N, 1])).flatten() + self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten() self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T) - dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.D * np.eye(self.M) - self.DBi_plus_BiPBi) + dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi) if self.likelihood.is_heteroscedastic: if self.has_uncertain_inputs: self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :] else: - self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.N)) + self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.num_data)) self.dL_dpsi2 = None else: dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta if self.has_uncertain_inputs: # repeat for each of the N psi_2 matrices - self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.N, axis=0) + self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.num_data, axis=0) else: # subsume back into psi1 (==Kmn) self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1) @@ -135,26 +135,26 @@ class sparse_GP(GPBase): raise NotImplementedError, "heteroscedatic derivates not implemented" else: # likelihood is not heterscedatic - self.partial_for_likelihood = -0.5 * self.N * self.D * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 - self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision) + self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 + self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision) self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V))) def log_likelihood(self): """ Compute the (lower bound on the) log marginal likelihood """ if self.likelihood.is_heteroscedastic: - A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y) - B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) + A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y) + B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) else: - A = -0.5 * self.N * self.D * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT - B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) - C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2)) + A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT + B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) + C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) return A + B + C + D + self.likelihood.Z def _set_params(self, p): - self.Z = p[:self.M * self.input_dim].reshape(self.M, self.input_dim) - self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam]) - self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:]) + self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim) + self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.num_params]) + self.likelihood._set_params(p[self.Z.size + self.kern.num_params:]) self._compute_kernel_matrices() self._computations() @@ -221,7 +221,7 @@ class sparse_GP(GPBase): Bi, _ = linalg.lapack.flapack.dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work! symmetrify(Bi) - Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.M) - Bi) + Kmmi_LmiBLmi = backsub_both_sides(self.Lm, np.eye(self.num_inducing) - Bi) if X_variance_new is None: Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) @@ -259,12 +259,12 @@ class sparse_GP(GPBase): :type which_parts: ('all', list of bools) :param full_cov: whether to return the folll covariance matrix, or just the diagonal :type full_cov: bool - :rtype: posterior mean, a Numpy array, Nnew x self.D + :rtype: posterior mean, a Numpy array, Nnew x self.input_dim :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise - :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D + :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim - If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew. + If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. This is to allow for different normalizations of the output dimensions. """ diff --git a/GPy/examples/__init__.py b/GPy/examples/__init__.py index 551bff54..00bdab67 100644 --- a/GPy/examples/__init__.py +++ b/GPy/examples/__init__.py @@ -4,5 +4,5 @@ import classification import regression import dimensionality_reduction -import non_gaussian +import non_Gaussian import tutorials diff --git a/GPy/examples/classification.py b/GPy/examples/classification.py index a1be1cef..bff8dcd1 100644 --- a/GPy/examples/classification.py +++ b/GPy/examples/classification.py @@ -24,7 +24,7 @@ def crescent_data(seed=default_seed): # FIXME Y = data['Y'] Y[Y.flatten()==-1] = 0 - m = GPy.models.GP_classification(data['X'], Y) + m = GPy.models.GPClassification(data['X'], Y) m.ensure_default_constraints() m.update_likelihood_approximation() m.optimize() @@ -41,7 +41,7 @@ def oil(): Y[Y.flatten()==-1] = 0 # Create GP model - m = GPy.models.GP_classification(data['X'], Y) + m = GPy.models.GPClassification(data['X'], Y) # Contrain all parameters to be positive m.constrain_positive('') @@ -66,7 +66,7 @@ def toy_linear_1d_classification(seed=default_seed): Y[Y.flatten() == -1] = 0 # Model definition - m = GPy.models.GP_classification(data['X'], Y) + m = GPy.models.GPClassification(data['X'], Y) m.ensure_default_constraints() # Optimize @@ -95,7 +95,7 @@ def sparse_toy_linear_1d_classification(seed=default_seed): Y[Y.flatten() == -1] = 0 # Model definition - m = GPy.models.sparse_GP_classification(data['X'], Y) + m = GPy.models.SparseGPClassification(data['X'], Y) m['.*len']= 2. m.ensure_default_constraints() @@ -114,7 +114,8 @@ def sparse_toy_linear_1d_classification(seed=default_seed): return m def sparse_crescent_data(inducing=10, seed=default_seed): - """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. + """ + Run a Gaussian process classification with DTC approxiamtion on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. :param model_type: type of model to fit ['Full', 'FITC', 'DTC']. :param seed : seed value for data generation. @@ -127,7 +128,7 @@ def sparse_crescent_data(inducing=10, seed=default_seed): Y = data['Y'] Y[Y.flatten()==-1]=0 - m = GPy.models.sparse_GP_classification(data['X'], Y) + m = GPy.models.SparseGPClassification(data['X'], Y) m.ensure_default_constraints() m['.*len'] = 10. m.update_likelihood_approximation() @@ -135,3 +136,33 @@ def sparse_crescent_data(inducing=10, seed=default_seed): print(m) m.plot() return m + +def FITC_crescent_data(inducing=10, seed=default_seed): + """ + Run a Gaussian process classification with FITC approximation on the crescent data. The demonstration uses EP to approximate the likelihood. + + :param model_type: type of model to fit ['Full', 'FITC', 'DTC']. + :param seed : seed value for data generation. + :type seed: int + :param inducing : number of inducing variables (only used for 'FITC' or 'DTC'). + :type inducing: int + """ + + data = GPy.util.datasets.crescent_data(seed=seed) + Y = data['Y'] + Y[Y.flatten()==-1]=0 + + + data = GPy.util.datasets.crescent_data(seed=seed) + Y = data['Y'] + Y[Y.flatten()==-1]=0 + + m = GPy.models.FITCClassification(data['X'], Y) + m.ensure_default_constraints() + m['.*len'] = 3. + m.update_likelihood_approximation() + m.optimize() + print(m) + m.plot() + return m + diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 5e3eb964..ec6d2ca6 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -5,29 +5,28 @@ import numpy as np from matplotlib import pyplot as plt import GPy -from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM -from GPy.util.datasets import swiss_roll_generated from GPy.core.transformations import logexp +from GPy.models.bayesian_gplvm import BayesianGPLVM default_seed = np.random.seed(123344) def BGPLVM(seed=default_seed): N = 10 - M = 3 + num_inducing = 3 Q = 2 D = 4 # generate GPLVM-like data X = np.random.rand(N, Q) k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N), K, D).T + Y = np.random.multivariate_normal(np.zeros(N), K, Q).T k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True) + GPy.kern.white(Q) # k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q) # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M) + m = GPy.models.BayesianGPLVM(Y, Q, kernel=k, num_inducing=num_inducing) m.constrain_positive('(rbf|bias|noise|white|S)') # m.constrain_fixed('S', 1) @@ -63,8 +62,8 @@ def GPLVM_oil_100(optimize=True): m.plot_latent(labels=m.data_labels) return m -def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False): - from GPy.util.datasets import swiss_roll +def swiss_roll(optimize=True, N=1000, num_inducing=15, Q=4, sigma=.2, plot=False): + from GPy.util.datasets import swiss_roll_generated from GPy.core.transformations import logexp_clipped data = swiss_roll_generated(N=N, sigma=sigma) @@ -101,24 +100,24 @@ def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False): S = (var * np.ones_like(X) + np.clip(np.random.randn(N, Q) * var ** 2, - (1 - var), (1 - var))) + .001 - Z = np.random.permutation(X)[:M] + Z = np.random.permutation(X)[:num_inducing] kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) - m = Bayesian_GPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel) + m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) m.data_colors = c m.data_t = t - m.constrain('variance|length', logexp_clipped()) - m['lengthscale'] = 1. # X.var(0).max() / X.var(0) - m['noise'] = Y.var() / 100. m.ensure_default_constraints() + m['rbf_lengthscale'] = 1. # X.var(0).max() / X.var(0) + m['noise_variance'] = Y.var() / 100. + m['bias_variance'] = 0.05 if optimize: m.optimize('scg', messages=1) return m -def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k): +def BGPLVM_oil(optimize=True, N=100, Q=5, num_inducing=25, max_f_eval=4e3, plot=False, **k): np.random.seed(0) data = GPy.util.datasets.oil() from GPy.core.transformations import logexp_clipped @@ -129,7 +128,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k) Yn = Y - Y.mean(0) Yn /= Yn.std(0) - m = GPy.models.Bayesian_GPLVM(Yn, Q, kernel=kernel, M=M, **k) + m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k) m.data_labels = data['Y'][:N].argmax(axis=1) # m.constrain('variance|leng', logexp_clipped()) @@ -168,7 +167,7 @@ def oil_100(): -def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False): +def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): x = np.linspace(0, 4 * np.pi, N)[:, None] s1 = np.vectorize(lambda x: np.sin(x)) s2 = np.vectorize(lambda x: np.cos(x)) @@ -228,13 +227,13 @@ def bgplvm_simulation_matlab_compare(): Y = sim_data['Y'] S = sim_data['S'] mu = sim_data['mu'] - M, [_, Q] = 3, mu.shape + num_inducing, [_, Q] = 3, mu.shape from GPy.models import mrd from GPy import kern reload(mrd); reload(kern) k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) - m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, + m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, # X=mu, # X_variance=S, _debug=False) @@ -248,8 +247,8 @@ def bgplvm_simulation(optimize='scg', plot=True, max_f_eval=2e4): # from GPy.core.transformations import logexp_clipped - D1, D2, D3, N, M, Q = 15, 8, 8, 100, 3, 5 - slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot) + D1, D2, D3, N, num_inducing, Q = 15, 8, 8, 100, 3, 5 + slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot) from GPy.models import mrd from GPy import kern @@ -259,7 +258,7 @@ def bgplvm_simulation(optimize='scg', Y = Ylist[0] k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q) - m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True) + m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, _debug=True) # m.constrain('variance|noise', logexp_clipped()) m.ensure_default_constraints() m['noise'] = Y.var() / 100. @@ -276,8 +275,8 @@ def bgplvm_simulation(optimize='scg', return m def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): - D1, D2, D3, N, M, Q = 150, 200, 400, 500, 3, 7 - slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim) + D1, D2, D3, N, num_inducing, Q = 150, 200, 400, 500, 3, 7 + slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) from GPy.models import mrd from GPy import kern @@ -285,7 +284,7 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): reload(mrd); reload(kern) k = kern.linear(Q, [.05] * Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) - m = mrd.MRD(Ylist, Q=Q, M=M, kernels=k, initx="", initz='permute', **kw) + m = mrd.MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw) for i, Y in enumerate(Ylist): m['{}_noise'.format(i + 1)] = Y.var() / 100. @@ -297,7 +296,7 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): if optimize: print "Optimizing Model:" - m.optimize('scg', messages=1, max_iters=5e4, max_f_eval=5e4) + m.optimize('scg', messages=1, max_iters=5e4, max_f_eval=5e4, gtol=.05) if plot: m.plot_X_1d("MRD Latent Space 1D") m.plot_scales("MRD Scales") @@ -313,7 +312,7 @@ def brendan_faces(): Yn /= Yn.std() m = GPy.models.GPLVM(Yn, Q) - # m = GPy.models.Bayesian_GPLVM(Yn, Q, M=100) + # m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=100) # optimize m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) @@ -377,16 +376,16 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True): # X /= X.std(axis=0) # # Q = 10 -# M = 30 +# num_inducing = 30 # # kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) -# m = GPy.models.Bayesian_GPLVM(X, Q, kernel=kernel, M=M) +# m = GPy.models.BayesianGPLVM(X, Q, kernel=kernel, num_inducing=num_inducing) # # m.scale_factor = 100.0 # m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)') # from sklearn import cluster -# km = cluster.KMeans(M, verbose=10) +# km = cluster.KMeans(num_inducing, verbose=10) # Z = km.fit(m.X).cluster_centers_ -# # Z = GPy.util.misc.kmm_init(m.X, M) +# # Z = GPy.util.misc.kmm_init(m.X, num_inducing) # m.set('iip', Z) # m.set('bias', 1e-4) # # optimize diff --git a/GPy/examples/non_gaussian.py b/GPy/examples/non_Gaussian.py similarity index 100% rename from GPy/examples/non_gaussian.py rename to GPy/examples/non_Gaussian.py diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index fd2e85d4..a683f6bb 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -10,71 +10,71 @@ import numpy as np import GPy -def toy_rbf_1d(max_nb_eval_optim=100): +def toy_rbf_1d(optimizer='tnc', max_nb_eval_optim=100): """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" data = GPy.util.datasets.toy_rbf_1d() - # create simple GP model - m = GPy.models.GP_regression(data['X'],data['Y']) + # create simple GP Model + m = GPy.models.GPRegression(data['X'],data['Y']) # optimize m.ensure_default_constraints() - m.optimize(max_f_eval=max_nb_eval_optim) + m.optimize(optimizer, max_f_eval=max_nb_eval_optim) # plot m.plot() print(m) return m -def rogers_girolami_olympics(max_nb_eval_optim=100): +def rogers_girolami_olympics(optim_iters=100): """Run a standard Gaussian process regression on the Rogers and Girolami olympics data.""" data = GPy.util.datasets.rogers_girolami_olympics() - # create simple GP model - m = GPy.models.GP_regression(data['X'],data['Y']) + # create simple GP Model + m = GPy.models.GPRegression(data['X'],data['Y']) #set the lengthscale to be something sensible (defaults to 1) m['rbf_lengthscale'] = 10 # optimize m.ensure_default_constraints() - m.optimize(max_f_eval=max_nb_eval_optim) + m.optimize(max_f_eval=optim_iters) # plot m.plot(plot_limits = (1850, 2050)) print(m) return m -def toy_rbf_1d_50(max_nb_eval_optim=100): +def toy_rbf_1d_50(optim_iters=100): """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" data = GPy.util.datasets.toy_rbf_1d_50() - # create simple GP model - m = GPy.models.GP_regression(data['X'],data['Y']) + # create simple GP Model + m = GPy.models.GPRegression(data['X'],data['Y']) # optimize m.ensure_default_constraints() - m.optimize(max_f_eval=max_nb_eval_optim) + m.optimize(max_f_eval=optim_iters) # plot m.plot() print(m) return m -def silhouette(max_nb_eval_optim=100): +def silhouette(optim_iters=100): """Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper.""" data = GPy.util.datasets.silhouette() - # create simple GP model - m = GPy.models.GP_regression(data['X'],data['Y']) + # create simple GP Model + m = GPy.models.GPRegression(data['X'],data['Y']) # optimize m.ensure_default_constraints() - m.optimize(messages=True,max_f_eval=max_nb_eval_optim) + m.optimize(messages=True,max_f_eval=optim_iters) print(m) return m -def coregionalisation_toy2(max_nb_eval_optim=100): +def coregionalisation_toy2(optim_iters=100): """ A simple demonstration of coregionalisation on two sinusoidal functions. """ @@ -87,13 +87,13 @@ def coregionalisation_toy2(max_nb_eval_optim=100): Y = np.vstack((Y1,Y2)) k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) - k2 = GPy.kern.coregionalise(2,1) + k2 = GPy.kern.Coregionalise(2,1) k = k1.prod(k2,tensor=True) - m = GPy.models.GP_regression(X,Y,kernel=k) + m = GPy.models.GPRegression(X,Y,kernel=k) m.constrain_fixed('.*rbf_var',1.) #m.constrain_positive('.*kappa') m.ensure_default_constraints() - m.optimize('sim',messages=1,max_f_eval=max_nb_eval_optim) + m.optimize('sim',messages=1,max_f_eval=optim_iters) pb.figure() Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1)))) @@ -106,7 +106,7 @@ def coregionalisation_toy2(max_nb_eval_optim=100): pb.plot(X2[:,0],Y2[:,0],'gx',mew=2) return m -def coregionalisation_toy(max_nb_eval_optim=100): +def coregionalisation_toy(optim_iters=100): """ A simple demonstration of coregionalisation on two sinusoidal functions. """ @@ -119,13 +119,13 @@ def coregionalisation_toy(max_nb_eval_optim=100): Y = np.vstack((Y1,Y2)) k1 = GPy.kern.rbf(1) - k2 = GPy.kern.coregionalise(2,2) + k2 = GPy.kern.Coregionalise(2,2) k = k1.prod(k2,tensor=True) - m = GPy.models.GP_regression(X,Y,kernel=k) + m = GPy.models.GPRegression(X,Y,kernel=k) m.constrain_fixed('.*rbf_var',1.) #m.constrain_positive('kappa') m.ensure_default_constraints() - m.optimize(max_f_eval=max_nb_eval_optim) + m.optimize(max_f_eval=optim_iters) pb.figure() Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1)))) @@ -139,7 +139,7 @@ def coregionalisation_toy(max_nb_eval_optim=100): return m -def coregionalisation_sparse(max_nb_eval_optim=100): +def coregionalisation_sparse(optim_iters=100): """ A simple demonstration of coregionalisation on two sinusoidal functions using sparse approximations. """ @@ -151,21 +151,21 @@ def coregionalisation_sparse(max_nb_eval_optim=100): Y2 = -np.sin(X2) + np.random.randn(*X2.shape)*0.05 Y = np.vstack((Y1,Y2)) - M = 40 - Z = np.hstack((np.random.rand(M,1)*8,np.random.randint(0,2,M)[:,None])) + num_inducing = 40 + Z = np.hstack((np.random.rand(num_inducing,1)*8,np.random.randint(0,2,num_inducing)[:,None])) k1 = GPy.kern.rbf(1) - k2 = GPy.kern.coregionalise(2,2) + k2 = GPy.kern.Coregionalise(2,2) k = k1.prod(k2,tensor=True) + GPy.kern.white(2,0.001) - m = GPy.models.sparse_GP_regression(X,Y,kernel=k,Z=Z) - m.scale_factor = 10000. + m = GPy.models.SparseGPRegression(X,Y,kernel=k,Z=Z) m.constrain_fixed('.*rbf_var',1.) - #m.constrain_positive('kappa') m.constrain_fixed('iip') + m.constrain_bounded('noise_variance',1e-3,1e-1) m.ensure_default_constraints() - m.optimize_restarts(5, robust=True, messages=1, max_f_eval=max_nb_eval_optim) + m.optimize_restarts(5, robust=True, messages=1, max_f_eval=optim_iters) + #plotting: pb.figure() Xtest1 = np.hstack((np.linspace(0,9,100)[:,None],np.zeros((100,1)))) Xtest2 = np.hstack((np.linspace(0,9,100)[:,None],np.ones((100,1)))) @@ -181,7 +181,7 @@ def coregionalisation_sparse(max_nb_eval_optim=100): return m -def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000, max_nb_eval_optim=100): +def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000, optim_iters=300): """Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher.""" # Contour over a range of length scales and signal/noise ratios. @@ -197,7 +197,7 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000 data['Y'] = data['Y'] - np.mean(data['Y']) lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf) - pb.contour(length_scales, log_SNRs, np.exp(lls), 20) + pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet) ax = pb.gca() pb.xlabel('length scale') pb.ylabel('log_10 SNR') @@ -211,18 +211,20 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000 optim_point_y = np.empty(2) np.random.seed(seed=seed) for i in range(0, model_restarts): - kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + GPy.kern.white(1,variance=np.random.exponential(1.)) + #kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3,1), lengthscale=np.random.uniform(5,50)) - m = GPy.models.GP_regression(data['X'],data['Y'], kernel=kern) - optim_point_x[0] = m.get('rbf_lengthscale') - optim_point_y[0] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance')); + m = GPy.models.GPRegression(data['X'],data['Y'], kernel=kern) + m['noise_variance'] = np.random.uniform(1e-3,1) + optim_point_x[0] = m['rbf_lengthscale'] + optim_point_y[0] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']); # optimize m.ensure_default_constraints() - m.optimize(xtol=1e-6, ftol=1e-6, max_f_eval=max_nb_eval_optim) + m.optimize('scg', xtol=1e-6, ftol=1e-6, max_f_eval=optim_iters) - optim_point_x[1] = m.get('rbf_lengthscale') - optim_point_y[1] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance')); + optim_point_x[1] = m['rbf_lengthscale'] + optim_point_y[1] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']); pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1]-optim_point_x[0], optim_point_y[1]-optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k') models.append(m) @@ -231,42 +233,35 @@ def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000 ax.set_ylim(ylim) return (models, lls) -def _contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf): +def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf): """Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales. :data_set: A data set from the utils.datasets director. :length_scales: a list of length scales to explore for the contour plot. :log_SNRs: a list of base 10 logarithm signal to noise ratios to explore for the contour plot. - :signal_kernel: a kernel to use for the 'signal' portion of the data.""" + :kernel: a kernel to use for the 'signal' portion of the data.""" lls = [] total_var = np.var(data['Y']) + kernel = kernel_call(1, variance=1., lengthscale=1.) + Model = GPy.models.GPRegression(data['X'], data['Y'], kernel=kernel) for log_SNR in log_SNRs: - SNR = 10**log_SNR + SNR = 10.**log_SNR + noise_var = total_var/(1.+SNR) + signal_var = total_var - noise_var + Model.kern['.*variance'] = signal_var + Model['noise_variance'] = noise_var length_scale_lls = [] + for length_scale in length_scales: - noise_var = 1. - signal_var = SNR - noise_var = noise_var/(noise_var + signal_var)*total_var - signal_var = signal_var/(noise_var + signal_var)*total_var + Model['.*lengthscale'] = length_scale + length_scale_lls.append(Model.log_likelihood()) - signal_kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale) - noise_kernel = GPy.kern.white(1, variance=noise_var) - kernel = signal_kernel + noise_kernel - K = kernel.K(data['X']) - total_var = (np.dot(np.dot(data['Y'].T,GPy.util.linalg.pdinv(K)[0]), data['Y'])/data['Y'].shape[0])[0,0] - noise_var *= total_var - signal_var *= total_var - - kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale) + GPy.kern.white(1, variance=noise_var) - - model = GPy.models.GP_regression(data['X'], data['Y'], kernel=kernel) - model.constrain_positive('') - length_scale_lls.append(model.log_likelihood()) lls.append(length_scale_lls) + return np.array(lls) -def sparse_GP_regression_1D(N = 400, M = 5, max_nb_eval_optim=100): +def sparse_GP_regression_1D(N = 400, num_inducing = 5, optim_iters=100): """Run a 1D example of a sparse GP regression.""" # sample inputs and outputs X = np.random.uniform(-3.,3.,(N,1)) @@ -275,17 +270,17 @@ def sparse_GP_regression_1D(N = 400, M = 5, max_nb_eval_optim=100): rbf = GPy.kern.rbf(1) noise = GPy.kern.white(1) kernel = rbf + noise - # create simple GP model - m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M) + # create simple GP Model + m = GPy.models.SparseGPRegression(X, Y, kernel, num_inducing=num_inducing) m.ensure_default_constraints() m.checkgrad(verbose=1) - m.optimize('tnc', messages = 1, max_f_eval=max_nb_eval_optim) + m.optimize('tnc', messages = 1, max_f_eval=optim_iters) m.plot() return m -def sparse_GP_regression_2D(N = 400, M = 50, max_nb_eval_optim=100): +def sparse_GP_regression_2D(N = 400, num_inducing = 50, optim_iters=100): """Run a 2D example of a sparse GP regression.""" X = np.random.uniform(-3.,3.,(N,2)) Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05 @@ -295,8 +290,8 @@ def sparse_GP_regression_2D(N = 400, M = 50, max_nb_eval_optim=100): noise = GPy.kern.white(2) kernel = rbf + noise - # create simple GP model - m = GPy.models.sparse_GP_regression(X,Y,kernel, M = M) + # create simple GP Model + m = GPy.models.SparseGPRegression(X,Y,kernel, num_inducing = num_inducing) # contrain all parameters to be positive (but not inducing inputs) m.ensure_default_constraints() @@ -305,13 +300,12 @@ def sparse_GP_regression_2D(N = 400, M = 50, max_nb_eval_optim=100): m.checkgrad() # optimize and plot - pb.figure() - m.optimize('tnc', messages = 1, max_f_eval=max_nb_eval_optim) + m.optimize('tnc', messages = 1, max_f_eval=optim_iters) m.plot() print(m) return m -def uncertain_inputs_sparse_regression(max_nb_eval_optim=100): +def uncertain_inputs_sparse_regression(optim_iters=100): """Run a 1D example of a sparse GP regression with uncertain inputs.""" fig, axes = pb.subplots(1,2,figsize=(12,5)) @@ -324,18 +318,18 @@ def uncertain_inputs_sparse_regression(max_nb_eval_optim=100): k = GPy.kern.rbf(1) + GPy.kern.white(1) - # create simple GP model - no input uncertainty on this one - m = GPy.models.sparse_GP_regression(X, Y, kernel=k, Z=Z) + # create simple GP Model - no input uncertainty on this one + m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z) m.ensure_default_constraints() - m.optimize('scg', messages=1, max_f_eval=max_nb_eval_optim) + m.optimize('scg', messages=1, max_f_eval=optim_iters) m.plot(ax=axes[0]) axes[0].set_title('no input uncertainty') - #the same model with uncertainty - m = GPy.models.sparse_GP_regression(X, Y, kernel=k, Z=Z, X_variance=S) + #the same Model with uncertainty + m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S) m.ensure_default_constraints() - m.optimize('scg', messages=1, max_f_eval=max_nb_eval_optim) + m.optimize('scg', messages=1, max_f_eval=optim_iters) m.plot(ax=axes[1]) axes[1].set_title('with input uncertainty') print(m) diff --git a/GPy/examples/tutorials.py b/GPy/examples/tutorials.py index bb5192d8..fc33d2bc 100644 --- a/GPy/examples/tutorials.py +++ b/GPy/examples/tutorials.py @@ -19,7 +19,7 @@ def tuto_GP_regression(): kernel = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.) - m = GPy.models.GP_regression(X,Y,kernel) + m = GPy.models.GPRegression(X, Y, kernel) print m m.plot() @@ -46,7 +46,7 @@ def tuto_GP_regression(): ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2) # create simple GP model - m = GPy.models.GP_regression(X,Y,ker) + m = GPy.models.GPRegression(X, Y, ker) # contrain all parameters to be positive m.constrain_positive('') @@ -114,7 +114,12 @@ def tuto_kernel_overview(): Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:]) # Create GP regression model +<<<<<<< HEAD m = GPy.models.GP_regression(X,Y,Kanova) +======= + m = GPy.models.GPRegression(X, Y, Kanova) + pb.figure(figsize=(5,5)) +>>>>>>> efbf169a6a17d824234d538553ffcbe0c4bddc40 m.plot() pb.figure(figsize=(20,3)) @@ -140,5 +145,5 @@ def model_interaction(): X = np.random.randn(20,1) Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5. k = GPy.kern.rbf(1) + GPy.kern.bias(1) - return GPy.models.GP_regression(X,Y,kernel=k) + return GPy.models.GPRegression(X, Y, kernel=k) diff --git a/GPy/inference/optimization.py b/GPy/inference/optimization.py index e208392b..433d5f41 100644 --- a/GPy/inference/optimization.py +++ b/GPy/inference/optimization.py @@ -1,18 +1,16 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -import pdb import pylab as pb import datetime as dt from scipy import optimize -import numpy as np try: import rasmussens_minimize as rasm rasm_available = True except ImportError: rasm_available = False -from SCG import SCG +from scg import SCG class Optimizer(): """ @@ -51,9 +49,9 @@ class Optimizer(): start = dt.datetime.now() self.opt(**kwargs) end = dt.datetime.now() - self.time = str(end-start) + self.time = str(end - start) - def opt(self, f_fp = None, f = None, fp = None): + def opt(self, f_fp=None, f=None, fp=None): raise NotImplementedError, "this needs to be implemented to use the optimizer class" def plot(self): @@ -78,7 +76,7 @@ class opt_tnc(Optimizer): Optimizer.__init__(self, *args, **kwargs) self.opt_name = "TNC (Scipy implementation)" - def opt(self, f_fp = None, f = None, fp = None): + def opt(self, f_fp=None, f=None, fp=None): """ Run the TNC optimizer @@ -96,8 +94,8 @@ class opt_tnc(Optimizer): if self.gtol is not None: opt_dict['pgtol'] = self.gtol - opt_result = optimize.fmin_tnc(f_fp, self.x_init, messages = self.messages, - maxfun = self.max_f_eval, **opt_dict) + opt_result = optimize.fmin_tnc(f_fp, self.x_init, messages=self.messages, + maxfun=self.max_f_eval, **opt_dict) self.x_opt = opt_result[0] self.f_opt = f_fp(self.x_opt)[0] self.funct_eval = opt_result[1] @@ -108,7 +106,7 @@ class opt_lbfgsb(Optimizer): Optimizer.__init__(self, *args, **kwargs) self.opt_name = "L-BFGS-B (Scipy implementation)" - def opt(self, f_fp = None, f = None, fp = None): + def opt(self, f_fp=None, f=None, fp=None): """ Run the optimizer @@ -130,8 +128,8 @@ class opt_lbfgsb(Optimizer): if self.gtol is not None: opt_dict['pgtol'] = self.gtol - opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint = iprint, - maxfun = self.max_f_eval, **opt_dict) + opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint, + maxfun=self.max_f_eval, **opt_dict) self.x_opt = opt_result[0] self.f_opt = f_fp(self.x_opt)[0] self.funct_eval = opt_result[2]['funcalls'] @@ -142,12 +140,12 @@ class opt_simplex(Optimizer): Optimizer.__init__(self, *args, **kwargs) self.opt_name = "Nelder-Mead simplex routine (via Scipy)" - def opt(self, f_fp = None, f = None, fp = None): + def opt(self, f_fp=None, f=None, fp=None): """ The simplex optimizer does not require gradients. """ - statuses = ['Converged', 'Maximum number of function evaluations made','Maximum number of iterations reached'] + statuses = ['Converged', 'Maximum number of function evaluations made', 'Maximum number of iterations reached'] opt_dict = {} if self.xtol is not None: @@ -157,8 +155,8 @@ class opt_simplex(Optimizer): if self.gtol is not None: print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it" - opt_result = optimize.fmin(f, self.x_init, (), disp = self.messages, - maxfun = self.max_f_eval, full_output=True, **opt_dict) + opt_result = optimize.fmin(f, self.x_init, (), disp=self.messages, + maxfun=self.max_f_eval, full_output=True, **opt_dict) self.x_opt = opt_result[0] self.f_opt = opt_result[1] @@ -172,7 +170,7 @@ class opt_rasm(Optimizer): Optimizer.__init__(self, *args, **kwargs) self.opt_name = "Rasmussen's Conjugate Gradient" - def opt(self, f_fp = None, f = None, fp = None): + def opt(self, f_fp=None, f=None, fp=None): """ Run Rasmussen's Conjugate Gradient optimizer """ @@ -189,8 +187,8 @@ class opt_rasm(Optimizer): if self.gtol is not None: print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it" - opt_result = rasm.minimize(self.x_init, f_fp, (), messages = self.messages, - maxnumfuneval = self.max_f_eval) + opt_result = rasm.minimize(self.x_init, f_fp, (), messages=self.messages, + maxnumfuneval=self.max_f_eval) self.x_opt = opt_result[0] self.f_opt = opt_result[1][-1] self.funct_eval = opt_result[2] @@ -203,7 +201,7 @@ class opt_SCG(Optimizer): Optimizer.__init__(self, *args, **kwargs) self.opt_name = "Scaled Conjugate Gradients" - def opt(self, f_fp = None, f = None, fp = None): + def opt(self, f_fp=None, f=None, fp=None): assert not f is None assert not fp is None opt_result = SCG(f, fp, self.x_init, display=self.messages, @@ -218,7 +216,7 @@ class opt_SCG(Optimizer): self.status = opt_result[3] def get_optimizer(f_min): - from SGD import opt_SGD + from sgd import opt_SGD optimizers = {'fmin_tnc': opt_tnc, 'simplex': opt_simplex, diff --git a/GPy/inference/SCG.py b/GPy/inference/scg.py similarity index 100% rename from GPy/inference/SCG.py rename to GPy/inference/scg.py diff --git a/GPy/inference/SGD.py b/GPy/inference/sgd.py similarity index 67% rename from GPy/inference/SGD.py rename to GPy/inference/sgd.py index c2a77e40..0002bb22 100644 --- a/GPy/inference/SGD.py +++ b/GPy/inference/sgd.py @@ -11,17 +11,17 @@ class opt_SGD(Optimizer): Optimize using stochastic gradient descent. *** Parameters *** - model: reference to the model object + Model: reference to the Model object iterations: number of iterations learning_rate: learning rate momentum: momentum """ - def __init__(self, start, iterations = 10, learning_rate = 1e-4, momentum = 0.9, model = None, messages = False, batch_size = 1, self_paced = False, center = True, iteration_file = None, learning_rate_adaptation=None, actual_iter=None, schedule=None, **kwargs): + def __init__(self, start, iterations = 10, learning_rate = 1e-4, momentum = 0.9, Model = None, messages = False, batch_size = 1, self_paced = False, center = True, iteration_file = None, learning_rate_adaptation=None, actual_iter=None, schedule=None, **kwargs): self.opt_name = "Stochastic Gradient Descent" - self.model = model + self.Model = Model self.iterations = iterations self.momentum = momentum self.learning_rate = learning_rate @@ -42,17 +42,17 @@ class opt_SGD(Optimizer): self.learning_rate_0 = self.learning_rate.mean() self.schedule = schedule - # if len([p for p in self.model.kern.parts if p.name == 'bias']) == 1: + # if len([p for p in self.Model.kern.parts if p.name == 'bias']) == 1: # self.param_traces.append(('bias',[])) - # if len([p for p in self.model.kern.parts if p.name == 'linear']) == 1: + # if len([p for p in self.Model.kern.parts if p.name == 'linear']) == 1: # self.param_traces.append(('linear',[])) - # if len([p for p in self.model.kern.parts if p.name == 'rbf']) == 1: + # if len([p for p in self.Model.kern.parts if p.name == 'rbf']) == 1: # self.param_traces.append(('rbf_var',[])) self.param_traces = dict(self.param_traces) self.fopt_trace = [] - num_params = len(self.model._get_params()) + num_params = len(self.Model._get_params()) if isinstance(self.learning_rate, float): self.learning_rate = np.ones((num_params,)) * self.learning_rate @@ -84,7 +84,7 @@ class opt_SGD(Optimizer): return (np.isnan(data).sum(axis=1) == 0) def check_for_missing(self, data): - if sp.sparse.issparse(self.model.likelihood.Y): + if sp.sparse.issparse(self.Model.likelihood.Y): return True else: return np.isnan(data).sum() > 0 @@ -107,32 +107,32 @@ class opt_SGD(Optimizer): def shift_constraints(self, j): - constrained_indices = copy.deepcopy(self.model.constrained_indices) + constrained_indices = copy.deepcopy(self.Model.constrained_indices) for c, constraint in enumerate(constrained_indices): mask = (np.ones_like(constrained_indices[c]) == 1) for i in range(len(constrained_indices[c])): pos = np.where(j == constrained_indices[c][i])[0] if len(pos) == 1: - self.model.constrained_indices[c][i] = pos + self.Model.constrained_indices[c][i] = pos else: mask[i] = False - self.model.constrained_indices[c] = self.model.constrained_indices[c][mask] + self.Model.constrained_indices[c] = self.Model.constrained_indices[c][mask] return constrained_indices # back them up - # bounded_i = copy.deepcopy(self.model.constrained_bounded_indices) - # bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers) - # bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers) + # bounded_i = copy.deepcopy(self.Model.constrained_bounded_indices) + # bounded_l = copy.deepcopy(self.Model.constrained_bounded_lowers) + # bounded_u = copy.deepcopy(self.Model.constrained_bounded_uppers) # for b in range(len(bounded_i)): # for each group of constraints # for bc in range(len(bounded_i[b])): # pos = np.where(j == bounded_i[b][bc])[0] # if len(pos) == 1: - # pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0] - # self.model.constrained_bounded_indices[b][pos2] = pos[0] + # pos2 = np.where(self.Model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0] + # self.Model.constrained_bounded_indices[b][pos2] = pos[0] # else: - # if len(self.model.constrained_bounded_indices[b]) == 1: + # if len(self.Model.constrained_bounded_indices[b]) == 1: # # if it's the last index to be removed # # the logic here is just a mess. If we remove the last one, then all the # # b-indices change and we have to iterate through everything to find our @@ -140,35 +140,35 @@ class opt_SGD(Optimizer): # raise NotImplementedError # else: # just remove it from the indices - # mask = self.model.constrained_bounded_indices[b] != bc - # self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask] + # mask = self.Model.constrained_bounded_indices[b] != bc + # self.Model.constrained_bounded_indices[b] = self.Model.constrained_bounded_indices[b][mask] # # here we shif the positive constraints. We cycle through each positive # # constraint - # positive = self.model.constrained_positive_indices.copy() + # positive = self.Model.constrained_positive_indices.copy() # mask = (np.ones_like(positive) == 1) # for p in range(len(positive)): # # we now check whether the constrained index appears in the j vector # # (the vector of the "active" indices) - # pos = np.where(j == self.model.constrained_positive_indices[p])[0] + # pos = np.where(j == self.Model.constrained_positive_indices[p])[0] # if len(pos) == 1: - # self.model.constrained_positive_indices[p] = pos + # self.Model.constrained_positive_indices[p] = pos # else: # mask[p] = False - # self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask] + # self.Model.constrained_positive_indices = self.Model.constrained_positive_indices[mask] # return (bounded_i, bounded_l, bounded_u), positive def restore_constraints(self, c):#b, p): - # self.model.constrained_bounded_indices = b[0] - # self.model.constrained_bounded_lowers = b[1] - # self.model.constrained_bounded_uppers = b[2] - # self.model.constrained_positive_indices = p - self.model.constrained_indices = c + # self.Model.constrained_bounded_indices = b[0] + # self.Model.constrained_bounded_lowers = b[1] + # self.Model.constrained_bounded_uppers = b[2] + # self.Model.constrained_positive_indices = p + self.Model.constrained_indices = c def get_param_shapes(self, N = None, input_dim = None): - model_name = self.model.__class__.__name__ + model_name = self.Model.__class__.__name__ if model_name == 'GPLVM': return [(N, input_dim)] if model_name == 'Bayesian_GPLVM': @@ -179,37 +179,37 @@ class opt_SGD(Optimizer): def step_with_missing_data(self, f_fp, X, step, shapes): N, input_dim = X.shape - if not sp.sparse.issparse(self.model.likelihood.Y): - Y = self.model.likelihood.Y - samples = self.non_null_samples(self.model.likelihood.Y) - self.model.N = samples.sum() + if not sp.sparse.issparse(self.Model.likelihood.Y): + Y = self.Model.likelihood.Y + samples = self.non_null_samples(self.Model.likelihood.Y) + self.Model.N = samples.sum() Y = Y[samples] else: - samples = self.model.likelihood.Y.nonzero()[0] - self.model.N = len(samples) - Y = np.asarray(self.model.likelihood.Y[samples].todense(), dtype = np.float64) + samples = self.Model.likelihood.Y.nonzero()[0] + self.Model.N = len(samples) + Y = np.asarray(self.Model.likelihood.Y[samples].todense(), dtype = np.float64) - if self.model.N == 0 or Y.std() == 0.0: - return 0, step, self.model.N + if self.Model.N == 0 or Y.std() == 0.0: + return 0, step, self.Model.N - self.model.likelihood._offset = Y.mean() - self.model.likelihood._scale = Y.std() - self.model.likelihood.set_data(Y) - # self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision + self.Model.likelihood._offset = Y.mean() + self.Model.likelihood._scale = Y.std() + self.Model.likelihood.set_data(Y) + # self.Model.likelihood.V = self.Model.likelihood.Y*self.Model.likelihood.precision - sigma = self.model.likelihood._variance - self.model.likelihood._variance = None # invalidate cache - self.model.likelihood._set_params(sigma) + sigma = self.Model.likelihood._variance + self.Model.likelihood._variance = None # invalidate cache + self.Model.likelihood._set_params(sigma) j = self.subset_parameter_vector(self.x_opt, samples, shapes) - self.model.X = X[samples] + self.Model.X = X[samples] - model_name = self.model.__class__.__name__ + model_name = self.Model.__class__.__name__ if model_name == 'Bayesian_GPLVM': - self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T) - self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT) + self.Model.likelihood.YYT = np.dot(self.Model.likelihood.Y, self.Model.likelihood.Y.T) + self.Model.likelihood.trYYT = np.trace(self.Model.likelihood.YYT) ci = self.shift_constraints(j) f, fp = f_fp(self.x_opt[j]) @@ -218,18 +218,18 @@ class opt_SGD(Optimizer): self.x_opt[j] -= step[j] self.restore_constraints(ci) - self.model.grads[j] = fp + self.Model.grads[j] = fp # restore likelihood _offset and _scale, otherwise when we call set_data(y) on # the next feature, it will get normalized with the mean and std of this one. - self.model.likelihood._offset = 0 - self.model.likelihood._scale = 1 + self.Model.likelihood._offset = 0 + self.Model.likelihood._scale = 1 - return f, step, self.model.N + return f, step, self.Model.N def adapt_learning_rate(self, t, D): if self.learning_rate_adaptation == 'adagrad': if t > 0: - g_k = self.model.grads + g_k = self.Model.grads self.s_k += np.square(g_k) t0 = 100.0 self.learning_rate = 0.1/(t0 + np.sqrt(self.s_k)) @@ -245,8 +245,8 @@ class opt_SGD(Optimizer): elif self.learning_rate_adaptation == 'semi_pesky': - if self.model.__class__.__name__ == 'Bayesian_GPLVM': - g_t = self.model.grads + if self.Model.__class__.__name__ == 'Bayesian_GPLVM': + g_t = self.Model.grads if t == 0: self.hbar_t = 0.0 self.tau_t = 100.0 @@ -259,28 +259,28 @@ class opt_SGD(Optimizer): def opt(self, f_fp=None, f=None, fp=None): - self.x_opt = self.model._get_params_transformed() + self.x_opt = self.Model._get_params_transformed() self.grads = [] - X, Y = self.model.X.copy(), self.model.likelihood.Y.copy() + X, Y = self.Model.X.copy(), self.Model.likelihood.Y.copy() - self.model.likelihood.YYT = 0 - self.model.likelihood.trYYT = 0 - self.model.likelihood._offset = 0.0 - self.model.likelihood._scale = 1.0 + self.Model.likelihood.YYT = 0 + self.Model.likelihood.trYYT = 0 + self.Model.likelihood._offset = 0.0 + self.Model.likelihood._scale = 1.0 - N, input_dim = self.model.X.shape - D = self.model.likelihood.Y.shape[1] - num_params = self.model._get_params() + N, input_dim = self.Model.X.shape + D = self.Model.likelihood.Y.shape[1] + num_params = self.Model._get_params() self.trace = [] - missing_data = self.check_for_missing(self.model.likelihood.Y) + missing_data = self.check_for_missing(self.Model.likelihood.Y) step = np.zeros_like(num_params) for it in range(self.iterations): if self.actual_iter != None: it = self.actual_iter - self.model.grads = np.zeros_like(self.x_opt) # TODO this is ugly + self.Model.grads = np.zeros_like(self.x_opt) # TODO this is ugly if it == 0 or self.self_paced is False: features = np.random.permutation(Y.shape[1]) @@ -292,29 +292,29 @@ class opt_SGD(Optimizer): NLL = [] import pylab as plt for count, j in enumerate(features): - self.model.D = len(j) - self.model.likelihood.D = len(j) - self.model.likelihood.set_data(Y[:, j]) - # self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision + self.Model.input_dim = len(j) + self.Model.likelihood.input_dim = len(j) + self.Model.likelihood.set_data(Y[:, j]) + # self.Model.likelihood.V = self.Model.likelihood.Y*self.Model.likelihood.precision - sigma = self.model.likelihood._variance - self.model.likelihood._variance = None # invalidate cache - self.model.likelihood._set_params(sigma) + sigma = self.Model.likelihood._variance + self.Model.likelihood._variance = None # invalidate cache + self.Model.likelihood._set_params(sigma) if missing_data: shapes = self.get_param_shapes(N, input_dim) f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes) else: - self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T) - self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT) + self.Model.likelihood.YYT = np.dot(self.Model.likelihood.Y, self.Model.likelihood.Y.T) + self.Model.likelihood.trYYT = np.trace(self.Model.likelihood.YYT) Nj = N f, fp = f_fp(self.x_opt) - self.model.grads = fp.copy() + self.Model.grads = fp.copy() step = self.momentum * step + self.learning_rate * fp self.x_opt -= step if self.messages == 2: - noise = self.model.likelihood._variance + noise = self.Model.likelihood._variance status = "evaluating {feature: 5d}/{tot: 5d} \t f: {f: 2.3f} \t non-missing: {nm: 4d}\t noise: {noise: 2.4f}\r".format(feature = count, tot = len(features), f = f, nm = Nj, noise = noise) sys.stdout.write(status) sys.stdout.flush() @@ -328,19 +328,19 @@ class opt_SGD(Optimizer): # plt.plot(self.param_traces['noise']) # for k in self.param_traces.keys(): - # self.param_traces[k].append(self.model.get(k)[0]) - self.grads.append(self.model.grads.tolist()) + # self.param_traces[k].append(self.Model.get(k)[0]) + self.grads.append(self.Model.grads.tolist()) # should really be a sum(), but earlier samples in the iteration will have a very crappy ll self.f_opt = np.mean(NLL) - self.model.N = N - self.model.X = X - self.model.D = D - self.model.likelihood.N = N - self.model.likelihood.D = D - self.model.likelihood.Y = Y - sigma = self.model.likelihood._variance - self.model.likelihood._variance = None # invalidate cache - self.model.likelihood._set_params(sigma) + self.Model.N = N + self.Model.X = X + self.Model.input_dim = D + self.Model.likelihood.N = N + self.Model.likelihood.input_dim = D + self.Model.likelihood.Y = Y + sigma = self.Model.likelihood._variance + self.Model.likelihood._variance = None # invalidate cache + self.Model.likelihood._set_params(sigma) self.trace.append(self.f_opt) if self.iteration_file is not None: diff --git a/GPy/kern/Brownian.py b/GPy/kern/Brownian.py index c5b19653..76e103af 100644 --- a/GPy/kern/Brownian.py +++ b/GPy/kern/Brownian.py @@ -2,26 +2,26 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np def theta(x): """Heavisdie step function""" return np.where(x>=0.,1.,0.) -class Brownian(kernpart): +class Brownian(Kernpart): """ Brownian Motion kernel. - :param D: the number of input dimensions - :type D: int + :param input_dim: the number of input dimensions + :type input_dim: int :param variance: :type variance: float """ - def __init__(self,D,variance=1.): - self.D = D - assert self.D==1, "Brownian motion in 1D only" - self.Nparam = 1. + def __init__(self,input_dim,variance=1.): + self.input_dim = input_dim + assert self.input_dim==1, "Brownian motion in 1D only" + self.num_params = 1. self.name = 'Brownian' self._set_params(np.array([variance]).flatten()) diff --git a/GPy/kern/Matern32.py b/GPy/kern/Matern32.py index 9503361d..60f0b6e9 100644 --- a/GPy/kern/Matern32.py +++ b/GPy/kern/Matern32.py @@ -2,22 +2,20 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np -import hashlib -from ..util.linalg import pdinv,mdot from scipy import integrate -class Matern32(kernpart): +class Matern32(Kernpart): """ Matern 3/2 kernel: .. math:: - k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } + k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } - :param D: the number of input dimensions - :type D: int + :param input_dim: the number of input dimensions + :type input_dim: int :param variance: the variance :math:`\sigma^2` :type variance: float :param lengthscale: the vector of lengthscale :math:`\ell_i` @@ -28,11 +26,11 @@ class Matern32(kernpart): """ - def __init__(self,D,variance=1.,lengthscale=None,ARD=False): - self.D = D + def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False): + self.input_dim = input_dim self.ARD = ARD if ARD == False: - self.Nparam = 2 + self.num_params = 2 self.name = 'Mat32' if lengthscale is not None: lengthscale = np.asarray(lengthscale) @@ -40,78 +38,78 @@ class Matern32(kernpart): else: lengthscale = np.ones(1) else: - self.Nparam = self.D + 1 + self.num_params = self.input_dim + 1 self.name = 'Mat32' if lengthscale is not None: lengthscale = np.asarray(lengthscale) - assert lengthscale.size == self.D, "bad number of lengthscales" + assert lengthscale.size == self.input_dim, "bad number of lengthscales" else: - lengthscale = np.ones(self.D) - self._set_params(np.hstack((variance,lengthscale.flatten()))) + lengthscale = np.ones(self.input_dim) + self._set_params(np.hstack((variance, lengthscale.flatten()))) def _get_params(self): """return the value of the parameters.""" - return np.hstack((self.variance,self.lengthscale)) + return np.hstack((self.variance, self.lengthscale)) - def _set_params(self,x): + def _set_params(self, x): """set the value of the parameters.""" - assert x.size == self.Nparam + assert x.size == self.num_params self.variance = x[0] self.lengthscale = x[1:] def _get_param_names(self): """return parameter names.""" - if self.Nparam == 2: - return ['variance','lengthscale'] + if self.num_params == 2: + return ['variance', 'lengthscale'] else: - return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)] + return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)] - def K(self,X,X2,target): + def K(self, X, X2, target): """Compute the covariance matrix between X and X2.""" if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) - np.add(self.variance*(1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist), target,target) + dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) + np.add(self.variance * (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist), target, target) - def Kdiag(self,X,target): + def Kdiag(self, X, target): """Compute the diagonal of the covariance matrix associated to X.""" - np.add(target,self.variance,target) + np.add(target, self.variance, target) - def dK_dtheta(self,dL_dK,X,X2,target): + def dK_dtheta(self, dL_dK, X, X2, target): """derivative of the covariance matrix with respect to the parameters.""" if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) - dvar = (1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist) - invdist = 1./np.where(dist!=0.,dist,np.inf) - dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3 - #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - target[0] += np.sum(dvar*dL_dK) + dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) + dvar = (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist) + invdist = 1. / np.where(dist != 0., dist, np.inf) + dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3 + # dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] + target[0] += np.sum(dvar * dL_dK) if self.ARD == True: - dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - #dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None] - target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0) + dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist))[:, :, np.newaxis] * dist2M * invdist[:, :, np.newaxis] + # dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None] + target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0) else: - dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist - #dl = self.variance*dvar*dist2M.sum(-1)*invdist - target[1] += np.sum(dl*dL_dK) + dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist)) * dist2M.sum(-1) * invdist + # dl = self.variance*dvar*dist2M.sum(-1)*invdist + target[1] += np.sum(dl * dL_dK) - def dKdiag_dtheta(self,dL_dKdiag,X,target): + def dKdiag_dtheta(self, dL_dKdiag, X, target): """derivative of the diagonal of the covariance matrix with respect to the parameters.""" target[0] += np.sum(dL_dKdiag) - def dK_dX(self,dL_dK,X,X2,target): + def dK_dX(self, dL_dK, X, X2, target): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] - ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) - dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2)) - target += np.sum(dK_dX*dL_dK.T[:,:,None],0) + dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] + ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) + dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2)) + target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) - def dKdiag_dX(self,dL_dKdiag,X,target): + def dKdiag_dX(self, dL_dKdiag, X, target): pass - def Gram_matrix(self,F,F1,F2,lower,upper): + def Gram_matrix(self, F, F1, F2, lower, upper): """ - Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1. + Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1. :param F: vector of functions :type F: np.array @@ -122,16 +120,16 @@ class Matern32(kernpart): :param lower,upper: boundaries of the input domain :type lower,upper: floats """ - assert self.D == 1 - def L(x,i): - return(3./self.lengthscale**2*F[i](x) + 2*np.sqrt(3)/self.lengthscale*F1[i](x) + F2[i](x)) + assert self.input_dim == 1 + def L(x, i): + return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x)) n = F.shape[0] - G = np.zeros((n,n)) + G = np.zeros((n, n)) for i in range(n): - for j in range(i,n): - G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0] - Flower = np.array([f(lower) for f in F])[:,None] - F1lower = np.array([f(lower) for f in F1])[:,None] - #print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n" - #return(G) - return(self.lengthscale**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T)) + for j in range(i, n): + G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0] + Flower = np.array([f(lower) for f in F])[:, None] + F1lower = np.array([f(lower) for f in F1])[:, None] + # print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n" + # return(G) + return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T)) diff --git a/GPy/kern/Matern52.py b/GPy/kern/Matern52.py index 9338db15..e02cb9bf 100644 --- a/GPy/kern/Matern52.py +++ b/GPy/kern/Matern52.py @@ -2,21 +2,21 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np import hashlib from scipy import integrate -class Matern52(kernpart): +class Matern52(Kernpart): """ Matern 5/2 kernel: .. math:: - k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^D \\frac{(x_i-y_i)^2}{\ell_i^2} } + k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r) \ \ \ \ \ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} } - :param D: the number of input dimensions - :type D: int + :param input_dim: the number of input dimensions + :type input_dim: int :param variance: the variance :math:`\sigma^2` :type variance: float :param lengthscale: the vector of lengthscale :math:`\ell_i` @@ -26,11 +26,11 @@ class Matern52(kernpart): :rtype: kernel object """ - def __init__(self,D,variance=1.,lengthscale=None,ARD=False): - self.D = D + def __init__(self,input_dim,variance=1.,lengthscale=None,ARD=False): + self.input_dim = input_dim self.ARD = ARD if ARD == False: - self.Nparam = 2 + self.num_params = 2 self.name = 'Mat52' if lengthscale is not None: lengthscale = np.asarray(lengthscale) @@ -38,13 +38,13 @@ class Matern52(kernpart): else: lengthscale = np.ones(1) else: - self.Nparam = self.D + 1 + self.num_params = self.input_dim + 1 self.name = 'Mat52' if lengthscale is not None: lengthscale = np.asarray(lengthscale) - assert lengthscale.size == self.D, "bad number of lengthscales" + assert lengthscale.size == self.input_dim, "bad number of lengthscales" else: - lengthscale = np.ones(self.D) + lengthscale = np.ones(self.input_dim) self._set_params(np.hstack((variance,lengthscale.flatten()))) def _get_params(self): @@ -53,13 +53,13 @@ class Matern52(kernpart): def _set_params(self,x): """set the value of the parameters.""" - assert x.size == self.Nparam + assert x.size == self.num_params self.variance = x[0] self.lengthscale = x[1:] def _get_param_names(self): """return parameter names.""" - if self.Nparam == 2: + if self.num_params == 2: return ['variance','lengthscale'] else: return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)] @@ -109,7 +109,7 @@ class Matern52(kernpart): def Gram_matrix(self,F,F1,F2,F3,lower,upper): """ - Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to D=1. + Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1. :param F: vector of functions :type F: np.array @@ -122,7 +122,7 @@ class Matern52(kernpart): :param lower,upper: boundaries of the input domain :type lower,upper: floats """ - assert self.D == 1 + assert self.input_dim == 1 def L(x,i): return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x)) n = F.shape[0] diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 81dce75f..97c1d88f 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -2,7 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs +from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, Coregionalise, rational_quadratic, Fixed, rbfcos, IndependentOutputs try: from constructors import rbf_sympy, sympykern # these depend on sympy except: diff --git a/GPy/kern/bias.py b/GPy/kern/bias.py index 09f0afa9..8ec3741d 100644 --- a/GPy/kern/bias.py +++ b/GPy/kern/bias.py @@ -2,20 +2,20 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np import hashlib -class bias(kernpart): - def __init__(self,D,variance=1.): +class bias(Kernpart): + def __init__(self,input_dim,variance=1.): """ - :param D: the number of input dimensions - :type D: int + :param input_dim: the number of input dimensions + :type input_dim: int :param variance: the variance of the kernel :type variance: float """ - self.D = D - self.Nparam = 1 + self.input_dim = input_dim + self.num_params = 1 self.name = 'bias' self._set_params(np.array([variance]).flatten()) diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index f2e4b57b..520c931b 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -12,7 +12,7 @@ from exponential import exponential as exponentialpart from Matern32 import Matern32 as Matern32part from Matern52 import Matern52 as Matern52part from bias import bias as biaspart -from fixed import fixed as fixedpart +from fixed import Fixed as fixedpart from finite_dimensional import finite_dimensional as finite_dimensionalpart from spline import spline as splinepart from Brownian import Brownian as Brownianpart @@ -21,10 +21,10 @@ from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part from prod import prod as prodpart from symmetric import symmetric as symmetric_part -from coregionalise import coregionalise as coregionalise_part +from coregionalise import Coregionalise as coregionalise_part from rational_quadratic import rational_quadratic as rational_quadraticpart from rbfcos import rbfcos as rbfcospart -from independent_outputs import independent_outputs as independent_output_part +from independent_outputs import IndependentOutputs as independent_output_part #TODO these s=constructors are not as clean as we'd like. Tidy the code up #using meta-classes to make the objects construct properly wthout them. @@ -33,8 +33,8 @@ def rbf(D,variance=1., lengthscale=None,ARD=False): """ Construct an RBF kernel - :param D: dimensionality of the kernel, obligatory - :type D: int + :param input_dim: dimensionality of the kernel, obligatory + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -51,7 +51,7 @@ def linear(D,variances=None,ARD=False): Arguments --------- - D (int), obligatory + input_dimD (int), obligatory variances (np.ndarray) ARD (boolean) """ @@ -64,7 +64,7 @@ def white(D,variance=1.): Arguments --------- - D (int), obligatory + input_dimD (int), obligatory variance (float) """ part = whitepart(D,variance) @@ -74,8 +74,8 @@ def exponential(D,variance=1., lengthscale=None, ARD=False): """ Construct an exponential kernel - :param D: dimensionality of the kernel, obligatory - :type D: int + :param input_dim: dimensionality of the kernel, obligatory + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -90,8 +90,8 @@ def Matern32(D,variance=1., lengthscale=None, ARD=False): """ Construct a Matern 3/2 kernel. - :param D: dimensionality of the kernel, obligatory - :type D: int + :param input_dim: dimensionality of the kernel, obligatory + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -106,8 +106,8 @@ def Matern52(D,variance=1., lengthscale=None, ARD=False): """ Construct a Matern 5/2 kernel. - :param D: dimensionality of the kernel, obligatory - :type D: int + :param input_dim: dimensionality of the kernel, obligatory + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -124,7 +124,7 @@ def bias(D,variance=1.): Arguments --------- - D (int), obligatory + input_dim (int), obligatory variance (float) """ part = biaspart(D,variance) @@ -133,7 +133,7 @@ def bias(D,variance=1.): def finite_dimensional(D,F,G,variances=1.,weights=None): """ Construct a finite dimensional kernel. - D: int - the number of input dimensions + input_dim: int - the number of input dimensions F: np.array of functions with shape (n,) - the n basis functions G: np.array with shape (n,n) - the Gram matrix associated to F variances : np.ndarray with shape (n,) @@ -145,8 +145,8 @@ def spline(D,variance=1.): """ Construct a spline kernel. - :param D: Dimensionality of the kernel - :type D: int + :param input_dim: Dimensionality of the kernel + :type input_dim: int :param variance: the variance of the kernel :type variance: float """ @@ -157,8 +157,8 @@ def Brownian(D,variance=1.): """ Construct a Brownian motion kernel. - :param D: Dimensionality of the kernel - :type D: int + :param input_dim: Dimensionality of the kernel + :type input_dim: int :param variance: the variance of the kernel :type variance: float """ @@ -204,8 +204,8 @@ def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_fre """ Construct an periodic exponential kernel - :param D: dimensionality, only defined for D=1 - :type D: int + :param input_dim: dimensionality, only defined for input_dim=1 + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -222,8 +222,8 @@ def periodic_Matern32(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10, """ Construct a periodic Matern 3/2 kernel. - :param D: dimensionality, only defined for D=1 - :type D: int + :param input_dim: dimensionality, only defined for input_dim=1 + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -240,8 +240,8 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10, """ Construct a periodic Matern 5/2 kernel. - :param D: dimensionality, only defined for D=1 - :type D: int + :param input_dim: dimensionality, only defined for input_dim=1 + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the lengthscale of the kernel @@ -256,14 +256,14 @@ def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10, def prod(k1,k2,tensor=False): """ - Construct a product kernel over D from two kernels over D + Construct a product kernel over input_dim from two kernels over input_dim :param k1, k2: the kernels to multiply :type k1, k2: kernpart :rtype: kernel object """ part = prodpart(k1,k2,tensor) - return kern(part.D, [part]) + return kern(part.input_dim, [part]) def symmetric(k): """ @@ -273,7 +273,7 @@ def symmetric(k): k_.parts = [symmetric_part(p) for p in k.parts] return k_ -def coregionalise(Nout,R=1, W=None, kappa=None): +def Coregionalise(Nout,R=1, W=None, kappa=None): p = coregionalise_part(Nout,R,W,kappa) return kern(1,[p]) @@ -282,8 +282,8 @@ def rational_quadratic(D,variance=1., lengthscale=1., power=1.): """ Construct rational quadratic kernel. - :param D: the number of input dimensions - :type D: int (D=1 is the only value currently supported) + :param input_dim: the number of input dimensions + :type input_dim: int (input_dim=1 is the only value currently supported) :param variance: the variance :math:`\sigma^2` :type variance: float :param lengthscale: the lengthscale :math:`\ell` @@ -294,13 +294,13 @@ def rational_quadratic(D,variance=1., lengthscale=1., power=1.): part = rational_quadraticpart(D,variance, lengthscale, power) return kern(D, [part]) -def fixed(D, K, variance=1.): +def Fixed(D, K, variance=1.): """ - Construct a fixed effect kernel. + Construct a Fixed effect kernel. Arguments --------- - D (int), obligatory + input_dim (int), obligatory K (np.array), obligatory variance (float) """ @@ -314,13 +314,13 @@ def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False): part = rbfcospart(D,variance,frequencies,bandwidths,ARD) return kern(D,[part]) -def independent_outputs(k): +def IndependentOutputs(k): """ Construct a kernel with independent outputs from an existing kernel """ for sl in k.input_slices: assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)" parts = [independent_output_part(p) for p in k.parts] - return kern(k.D+1,parts) + return kern(k.input_dim+1,parts) diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py index a4d22c2d..8faceafe 100644 --- a/GPy/kern/coregionalise.py +++ b/GPy/kern/coregionalise.py @@ -1,18 +1,18 @@ # Copyright (c) 2012, James Hensman and Ricardo Andrade # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np from GPy.util.linalg import mdot, pdinv import pdb from scipy import weave -class coregionalise(kernpart): +class Coregionalise(Kernpart): """ Kernel for Intrinsic Corregionalization Models """ def __init__(self,Nout,R=1, W=None, kappa=None): - self.D = 1 + self.input_dim = 1 self.name = 'coregion' self.Nout = Nout self.R = R @@ -26,14 +26,14 @@ class coregionalise(kernpart): else: assert kappa.shape==(self.Nout,) self.kappa = kappa - self.Nparam = self.Nout*(self.R + 1) + self.num_params = self.Nout*(self.R + 1) self._set_params(np.hstack([self.W.flatten(),self.kappa])) def _get_params(self): return np.hstack([self.W.flatten(),self.kappa]) def _set_params(self,x): - assert x.size == self.Nparam + assert x.size == self.num_params self.kappa = x[-self.Nout:] self.W = x[:-self.Nout].reshape(self.Nout,self.R) self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa) @@ -69,14 +69,14 @@ class coregionalise(kernpart): else: index2 = np.asarray(index2,dtype=np.int) code=""" - for(int i=0;i 1: self.tie_params(index) @@ -211,7 +210,7 @@ class kern(parameterised): def K(self, X, X2=None, which_parts='all'): if which_parts == 'all': which_parts = [True] * self.Nparts - assert X.shape[1] == self.D + assert X.shape[1] == self.input_dim if X2 is None: target = np.zeros((X.shape[0], X.shape[0])) [p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used] @@ -223,14 +222,14 @@ class kern(parameterised): def dK_dtheta(self, dL_dK, X, X2=None): """ :param dL_dK: An array of dL_dK derivaties, dL_dK - :type dL_dK: Np.ndarray (N x M) + :type dL_dK: Np.ndarray (N x num_inducing) :param X: Observed data inputs - :type X: np.ndarray (N x D) + :type X: np.ndarray (N x input_dim) :param X2: Observed dara inputs (optional, defaults to X) - :type X2: np.ndarray (M x D) + :type X2: np.ndarray (num_inducing x input_dim) """ - assert X.shape[1] == self.D - target = np.zeros(self.Nparam) + assert X.shape[1] == self.input_dim + target = np.zeros(self.num_params) if X2 is None: [p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)] else: @@ -251,20 +250,20 @@ class kern(parameterised): def Kdiag(self, X, which_parts='all'): if which_parts == 'all': which_parts = [True] * self.Nparts - assert X.shape[1] == self.D + assert X.shape[1] == self.input_dim target = np.zeros(X.shape[0]) [p.Kdiag(X[:, i_s], target=target) for p, i_s, part_on in zip(self.parts, self.input_slices, which_parts) if part_on] return target def dKdiag_dtheta(self, dL_dKdiag, X): - assert X.shape[1] == self.D + assert X.shape[1] == self.input_dim assert dL_dKdiag.size == X.shape[0] - target = np.zeros(self.Nparam) + target = np.zeros(self.num_params) [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] return self._transform_gradients(target) def dKdiag_dX(self, dL_dKdiag, X): - assert X.shape[1] == self.D + assert X.shape[1] == self.input_dim target = np.zeros_like(X) [p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] return target @@ -275,7 +274,7 @@ class kern(parameterised): return target def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): - target = np.zeros(self.Nparam) + target = np.zeros(self.num_params) [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] return self._transform_gradients(target) @@ -290,7 +289,7 @@ class kern(parameterised): return target def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): - target = np.zeros((self.Nparam)) + target = np.zeros((self.num_params)) [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] return self._transform_gradients(target) @@ -300,16 +299,16 @@ class kern(parameterised): return target def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): - """return shapes are N,M,input_dim""" + """return shapes are N,num_inducing,input_dim""" target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] return target_mu, target_S def psi2(self, Z, mu, S): """ - :param Z: np.ndarray of inducing inputs (M x input_dim) + :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) :param mu, S: np.ndarrays of means and variances (each N x input_dim) - :returns psi2: np.ndarray (N,M,M) + :returns psi2: np.ndarray (N,num_inducing,num_inducing) """ target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] @@ -327,13 +326,13 @@ class kern(parameterised): p2.psi1(Z, mu, S, tmp2) prod = np.multiply(tmp1, tmp2) - crossterms += prod[:,:,None] + prod[:, None, :] - + crossterms += prod[:, :, None] + prod[:, None, :] + target += crossterms return target def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): - target = np.zeros(self.Nparam) + target = np.zeros(self.num_params) [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] # compute the "cross" terms @@ -345,14 +344,14 @@ class kern(parameterised): tmp = np.zeros((mu.shape[0], Z.shape[0])) p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dtheta((tmp[:,None,:]*dL_dpsi2).sum(1)*2., Z, mu, S, target[ps2]) + p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) return self._transform_gradients(target) def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): target = np.zeros_like(Z) [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] - #target *= 2 + # target *= 2 # compute the "cross" terms # TODO: we need input_slices here. @@ -362,7 +361,7 @@ class kern(parameterised): tmp = np.zeros((mu.shape[0], Z.shape[0])) p1.psi1(Z, mu, S, tmp) tmp2 = np.zeros_like(target) - p2.dpsi1_dZ((tmp[:,None,:]*dL_dpsi2).sum(1).T, Z, mu, S, tmp2) + p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1).T, Z, mu, S, tmp2) target += tmp2 return target * 2 @@ -379,14 +378,14 @@ class kern(parameterised): tmp = np.zeros((mu.shape[0], Z.shape[0])) p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dmuS((tmp[:,None,:]*dL_dpsi2).sum(1).T*2., Z, mu, S, target_mu, target_S) + p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1).T * 2., Z, mu, S, target_mu, target_S) return target_mu, target_S def plot(self, x=None, plot_limits=None, which_parts='all', resolution=None, *args, **kwargs): if which_parts == 'all': which_parts = [True] * self.Nparts - if self.D == 1: + if self.input_dim == 1: if x is None: x = np.zeros((1, 1)) else: @@ -408,7 +407,7 @@ class kern(parameterised): pb.xlabel("x") pb.ylabel("k(x,%0.1f)" % x) - elif self.D == 2: + elif self.input_dim == 2: if x is None: x = np.zeros((1, 2)) else: @@ -430,7 +429,7 @@ class kern(parameterised): Xnew = np.vstack((xx.flatten(), yy.flatten())).T Kx = self.K(Xnew, x, which_parts) Kx = Kx.reshape(resolution, resolution).T - pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) + pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) # @UndefinedVariable pb.xlim(xmin[0], xmax[0]) pb.ylim(xmin[1], xmax[1]) pb.xlabel("x1") diff --git a/GPy/kern/kernpart.py b/GPy/kern/kernpart.py index 7de150e9..2ed56d66 100644 --- a/GPy/kern/kernpart.py +++ b/GPy/kern/kernpart.py @@ -2,18 +2,18 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -class kernpart(object): - def __init__(self,D): +class Kernpart(object): + def __init__(self,input_dim): """ The base class for a kernpart: a positive definite function which forms part of a kernel - :param D: the number of input dimensions to the function - :type D: int + :param input_dim: the number of input dimensions to the function + :type input_dim: int Do not instantiate. """ - self.D = D - self.Nparam = 1 + self.input_dim = input_dim + self.num_params = 1 self.name = 'unnamed' def _get_params(self): diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py index 8744eda0..b2aa29f9 100644 --- a/GPy/kern/linear.py +++ b/GPy/kern/linear.py @@ -2,21 +2,21 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np from ..util.linalg import tdot from scipy import weave -class linear(kernpart): +class linear(Kernpart): """ Linear kernel .. math:: - k(x,y) = \sum_{i=1}^D \sigma^2_i x_iy_i + k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i - :param D: the number of input dimensions - :type D: int + :param input_dim: the number of input dimensions + :type input_dim: int :param variances: the vector of variances :math:`\sigma^2_i` :type variances: array or list of the appropriate size (or float if there is only one variance parameter) :param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension. @@ -24,11 +24,11 @@ class linear(kernpart): :rtype: kernel object """ - def __init__(self, D, variances=None, ARD=False): - self.D = D + def __init__(self, input_dim, variances=None, ARD=False): + self.input_dim = input_dim self.ARD = ARD if ARD == False: - self.Nparam = 1 + self.num_params = 1 self.name = 'linear' if variances is not None: variances = np.asarray(variances) @@ -37,13 +37,13 @@ class linear(kernpart): variances = np.ones(1) self._Xcache, self._X2cache = np.empty(shape=(2,)) else: - self.Nparam = self.D + self.num_params = self.input_dim self.name = 'linear' if variances is not None: variances = np.asarray(variances) - assert variances.size == self.D, "bad number of lengthscales" + assert variances.size == self.input_dim, "bad number of lengthscales" else: - variances = np.ones(self.D) + variances = np.ones(self.input_dim) self._set_params(variances.flatten()) # initialize cache @@ -54,12 +54,12 @@ class linear(kernpart): return self.variances def _set_params(self, x): - assert x.size == (self.Nparam) + assert x.size == (self.num_params) self.variances = x self.variances2 = np.square(self.variances) def _get_param_names(self): - if self.Nparam == 1: + if self.num_params == 1: return ['variance'] else: return ['variance_%i' % i for i in range(self.variances.size)] @@ -82,7 +82,7 @@ class linear(kernpart): def dK_dtheta(self, dL_dK, X, X2, target): if self.ARD: if X2 is None: - [np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.D)] + [np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.input_dim)] else: product = X[:, None, :] * X2[None, :, :] target += (dL_dK[:, :, None] * product).sum(0).sum(0) @@ -138,7 +138,7 @@ class linear(kernpart): def psi2(self, Z, mu, S, target): """ - returns N,M,M matrix + returns N,num_inducing,num_inducing matrix """ self._psi_computations(Z, mu, S) # psi2_old = self.ZZ * np.square(self.variances) * self.mu2_S[:, None, None, :] @@ -153,7 +153,7 @@ class linear(kernpart): # psi2_real[n, m, m_prime] = np.dot(tmp, ( # self._Z[m_prime:m_prime + 1] * self.variances).T) # mu2_S = (self._mu[:, None, :] * self._mu[:, :, None]) -# mu2_S[:, np.arange(self.D), np.arange(self.D)] += self._S +# mu2_S[:, np.arange(self.input_dim), np.arange(self.input_dim)] += self._S # psi2 = (self.ZA[None, :, None, :] * mu2_S[:, None]).sum(-1) # psi2 = (psi2[:, :, None] * self.ZA[None, None]).sum(-1) # psi2_tensor = np.tensordot(self.ZZ[None, :, :, :] * np.square(self.variances), self.mu2_S[:, None, None, :], ((3), (3))).squeeze().T @@ -168,7 +168,7 @@ class linear(kernpart): target += tmp.sum() def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - """Think N,M,M,input_dim """ + """Think N,num_inducing,num_inducing,input_dim """ self._psi_computations(Z, mu, S) AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :] AZZA = AZZA + AZZA.swapaxes(1, 2) @@ -184,7 +184,7 @@ class linear(kernpart): double factor,tmp; #pragma omp parallel for private(m,mm,q,qq,factor,tmp) for(n=0;n' + k2.name self.k1 = k1 self.k2 = k2 if tensor: - self.D = k1.D + k2.D - self.slice1 = slice(0,self.k1.D) - self.slice2 = slice(self.k1.D,self.k1.D+self.k2.D) + self.input_dim = k1.input_dim + k2.input_dim + self.slice1 = slice(0,self.k1.input_dim) + self.slice2 = slice(self.k1.input_dim,self.k1.input_dim+self.k2.input_dim) else: - assert k1.D == k2.D, "Error: The input spaces of the kernels to sum don't have the same dimension." - self.D = k1.D - self.slice1 = slice(0,self.D) - self.slice2 = slice(0,self.D) + assert k1.input_dim == k2.input_dim, "Error: The input spaces of the kernels to sum don't have the same dimension." + self.input_dim = k1.input_dim + self.slice1 = slice(0,self.input_dim) + self.slice2 = slice(0,self.input_dim) self._X, self._X2, self._params = np.empty(shape=(3,1)) self._set_params(np.hstack((k1._get_params(),k2._get_params()))) @@ -40,8 +40,8 @@ class prod(kernpart): def _set_params(self,x): """set the value of the parameters.""" - self.k1._set_params(x[:self.k1.Nparam]) - self.k2._set_params(x[self.k1.Nparam:]) + self.k1._set_params(x[:self.k1.num_params]) + self.k2._set_params(x[self.k1.num_params:]) def _get_param_names(self): """return parameter names.""" @@ -55,11 +55,11 @@ class prod(kernpart): """derivative of the covariance matrix with respect to the parameters.""" self._K_computations(X,X2) if X2 is None: - self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.Nparam]) - self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.Nparam:]) + self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params]) + self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:]) else: - self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.Nparam]) - self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.Nparam:]) + self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params]) + self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:]) def Kdiag(self,X,target): """Compute the diagonal of the covariance matrix associated to X.""" @@ -74,8 +74,8 @@ class prod(kernpart): K2 = np.zeros(X.shape[0]) self.k1.Kdiag(X[:,self.slice1],K1) self.k2.Kdiag(X[:,self.slice2],K2) - self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.Nparam]) - self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.Nparam:]) + self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) + self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" diff --git a/GPy/kern/prod_orthogonal.py b/GPy/kern/prod_orthogonal.py index cc15a94e..237c9557 100644 --- a/GPy/kern/prod_orthogonal.py +++ b/GPy/kern/prod_orthogonal.py @@ -1,23 +1,23 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np import hashlib #from scipy import integrate # This may not be necessary (Nicolas, 20th Feb) -class prod_orthogonal(kernpart): +class prod_orthogonal(Kernpart): """ Computes the product of 2 kernels :param k1, k2: the kernels to multiply - :type k1, k2: kernpart + :type k1, k2: Kernpart :rtype: kernel object """ def __init__(self,k1,k2): - self.D = k1.D + k2.D - self.Nparam = k1.Nparam + k2.Nparam + self.input_dim = k1.input_dim + k2.input_dim + self.num_params = k1.num_params + k2.num_params self.name = k1.name + '' + k2.name self.k1 = k1 self.k2 = k2 @@ -30,8 +30,8 @@ class prod_orthogonal(kernpart): def _set_params(self,x): """set the value of the parameters.""" - self.k1._set_params(x[:self.k1.Nparam]) - self.k2._set_params(x[self.k1.Nparam:]) + self.k1._set_params(x[:self.k1.num_params]) + self.k2._set_params(x[self.k1.num_params:]) def _get_param_names(self): """return parameter names.""" @@ -45,42 +45,42 @@ class prod_orthogonal(kernpart): """derivative of the covariance matrix with respect to the parameters.""" self._K_computations(X,X2) if X2 is None: - self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.D], None, target[:self.k1.Nparam]) - self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.D:], None, target[self.k1.Nparam:]) + self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params]) + self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:]) else: - self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam]) - self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:]) + self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params]) + self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:]) def Kdiag(self,X,target): """Compute the diagonal of the covariance matrix associated to X.""" target1 = np.zeros(X.shape[0]) target2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,:self.k1.D],target1) - self.k2.Kdiag(X[:,self.k1.D:],target2) + self.k1.Kdiag(X[:,:self.k1.input_dim],target1) + self.k2.Kdiag(X[:,self.k1.input_dim:],target2) target += target1 * target2 def dKdiag_dtheta(self,dL_dKdiag,X,target): K1 = np.zeros(X.shape[0]) K2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,:self.k1.D],K1) - self.k2.Kdiag(X[:,self.k1.D:],K2) - self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.D],target[:self.k1.Nparam]) - self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.D:],target[self.k1.Nparam:]) + self.k1.Kdiag(X[:,:self.k1.input_dim],K1) + self.k2.Kdiag(X[:,self.k1.input_dim:],K2) + self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params]) + self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:]) def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" self._K_computations(X,X2) - self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target) - self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target) + self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target) + self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target) def dKdiag_dX(self, dL_dKdiag, X, target): K1 = np.zeros(X.shape[0]) K2 = np.zeros(X.shape[0]) - self.k1.Kdiag(X[:,0:self.k1.D],K1) - self.k2.Kdiag(X[:,self.k1.D:],K2) + self.k1.Kdiag(X[:,0:self.k1.input_dim],K1) + self.k2.Kdiag(X[:,self.k1.input_dim:],K2) - self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.D], target) - self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.D:], target) + self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.input_dim], target) + self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.input_dim:], target) def _K_computations(self,X,X2): if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())): @@ -90,12 +90,12 @@ class prod_orthogonal(kernpart): self._X2 = None self._K1 = np.zeros((X.shape[0],X.shape[0])) self._K2 = np.zeros((X.shape[0],X.shape[0])) - self.k1.K(X[:,:self.k1.D],None,self._K1) - self.k2.K(X[:,self.k1.D:],None,self._K2) + self.k1.K(X[:,:self.k1.input_dim],None,self._K1) + self.k2.K(X[:,self.k1.input_dim:],None,self._K2) else: self._X2 = X2.copy() self._K1 = np.zeros((X.shape[0],X2.shape[0])) self._K2 = np.zeros((X.shape[0],X2.shape[0])) - self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],self._K1) - self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],self._K2) + self.k1.K(X[:,:self.k1.input_dim],X2[:,:self.k1.input_dim],self._K1) + self.k2.K(X[:,self.k1.input_dim:],X2[:,self.k1.input_dim:],self._K2) diff --git a/GPy/kern/rational_quadratic.py b/GPy/kern/rational_quadratic.py index 561ea065..d1e7a7e3 100644 --- a/GPy/kern/rational_quadratic.py +++ b/GPy/kern/rational_quadratic.py @@ -2,10 +2,10 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np -class rational_quadratic(kernpart): +class rational_quadratic(Kernpart): """ rational quadratic kernel @@ -13,21 +13,21 @@ class rational_quadratic(kernpart): k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2 \ell^2} \\bigg)^{- \\alpha} \ \ \ \ \ \\text{ where } r^2 = (x-y)^2 - :param D: the number of input dimensions - :type D: int (D=1 is the only value currently supported) + :param input_dim: the number of input dimensions + :type input_dim: int (input_dim=1 is the only value currently supported) :param variance: the variance :math:`\sigma^2` :type variance: float :param lengthscale: the lengthscale :math:`\ell` :type lengthscale: float :param power: the power :math:`\\alpha` :type power: float - :rtype: kernpart object + :rtype: Kernpart object """ - def __init__(self,D,variance=1.,lengthscale=1.,power=1.): - assert D == 1, "For this kernel we assume D=1" - self.D = D - self.Nparam = 3 + def __init__(self,input_dim,variance=1.,lengthscale=1.,power=1.): + assert input_dim == 1, "For this kernel we assume input_dim=1" + self.input_dim = input_dim + self.num_params = 3 self.name = 'rat_quad' self.variance = variance self.lengthscale = lengthscale diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index a89b7d45..2d316234 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -2,13 +2,13 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np import hashlib from scipy import weave from ..util.linalg import tdot -class rbf(kernpart): +class rbf(Kernpart): """ Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel: @@ -18,8 +18,8 @@ class rbf(kernpart): where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input. - :param D: the number of input dimensions - :type D: int + :param input_dim: the number of input dimensions + :type input_dim: int :param variance: the variance of the kernel :type variance: float :param lengthscale: the vector of lengthscale of the kernel @@ -31,76 +31,76 @@ class rbf(kernpart): .. Note: this object implements both the ARD and 'spherical' version of the function """ - def __init__(self,D,variance=1.,lengthscale=None,ARD=False): - self.D = D + def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False): + self.input_dim = input_dim self.name = 'rbf' self.ARD = ARD if not ARD: - self.Nparam = 2 + self.num_params = 2 if lengthscale is not None: lengthscale = np.asarray(lengthscale) assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" else: lengthscale = np.ones(1) else: - self.Nparam = self.D + 1 + self.num_params = self.input_dim + 1 if lengthscale is not None: lengthscale = np.asarray(lengthscale) - assert lengthscale.size == self.D, "bad number of lengthscales" + assert lengthscale.size == self.input_dim, "bad number of lengthscales" else: - lengthscale = np.ones(self.D) + lengthscale = np.ones(self.input_dim) - self._set_params(np.hstack((variance,lengthscale.flatten()))) + self._set_params(np.hstack((variance, lengthscale.flatten()))) - #initialize cache - self._Z, self._mu, self._S = np.empty(shape=(3,1)) - self._X, self._X2, self._params = np.empty(shape=(3,1)) + # initialize cache + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) + self._X, self._X2, self._params = np.empty(shape=(3, 1)) - #a set of optional args to pass to weave + # a set of optional args to pass to weave self.weave_options = {'headers' : [''], - 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], + 'extra_compile_args': ['-fopenmp -O3'], # -march=native'], 'extra_link_args' : ['-lgomp']} def _get_params(self): - return np.hstack((self.variance,self.lengthscale)) + return np.hstack((self.variance, self.lengthscale)) - def _set_params(self,x): - assert x.size==(self.Nparam) + def _set_params(self, x): + assert x.size == (self.num_params) self.variance = x[0] self.lengthscale = x[1:] self.lengthscale2 = np.square(self.lengthscale) - #reset cached results - self._X, self._X2, self._params = np.empty(shape=(3,1)) - self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S + # reset cached results + self._X, self._X2, self._params = np.empty(shape=(3, 1)) + self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S def _get_param_names(self): - if self.Nparam == 2: - return ['variance','lengthscale'] + if self.num_params == 2: + return ['variance', 'lengthscale'] else: - return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)] + return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)] - def K(self,X,X2,target): - self._K_computations(X,X2) - target += self.variance*self._K_dvar + def K(self, X, X2, target): + self._K_computations(X, X2) + target += self.variance * self._K_dvar - def Kdiag(self,X,target): - np.add(target,self.variance,target) + def Kdiag(self, X, target): + np.add(target, self.variance, target) - def dK_dtheta(self,dL_dK,X,X2,target): - self._K_computations(X,X2) - target[0] += np.sum(self._K_dvar*dL_dK) + def dK_dtheta(self, dL_dK, X, X2, target): + self._K_computations(X, X2) + target[0] += np.sum(self._K_dvar * dL_dK) if self.ARD: - dvardLdK = self._K_dvar*dL_dK - var_len3 = self.variance/np.power(self.lengthscale,3) + dvardLdK = self._K_dvar * dL_dK + var_len3 = self.variance / np.power(self.lengthscale, 3) if X2 is None: - #save computation for the symmetrical case + # save computation for the symmetrical case dvardLdK += dvardLdK.T code = """ int q,i,j; double tmp; - for(q=0; q """ weave.inline(code, support_code=support_code, libraries=['gomp'], - arg_names=['N','M','input_dim','mu','Zhat','mudist_sq','mudist','lengthscale2','_psi2_denom','psi2_Zdist_sq','psi2_exponent','half_log_psi2_denom','psi2','variance_sq'], - type_converters=weave.converters.blitz,**self.weave_options) + arg_names=['N','num_inducing','input_dim','mu','Zhat','mudist_sq','mudist','lengthscale2','_psi2_denom','psi2_Zdist_sq','psi2_exponent','half_log_psi2_denom','psi2','variance_sq'], + type_converters=weave.converters.blitz, **self.weave_options) - return mudist,mudist_sq, psi2_exponent, psi2 + return mudist, mudist_sq, psi2_exponent, psi2 diff --git a/GPy/kern/rbfcos.py b/GPy/kern/rbfcos.py index 094b806b..b1e99d3c 100644 --- a/GPy/kern/rbfcos.py +++ b/GPy/kern/rbfcos.py @@ -3,32 +3,32 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np -class rbfcos(kernpart): - def __init__(self,D,variance=1.,frequencies=None,bandwidths=None,ARD=False): - self.D = D +class rbfcos(Kernpart): + def __init__(self,input_dim,variance=1.,frequencies=None,bandwidths=None,ARD=False): + self.input_dim = input_dim self.name = 'rbfcos' - if self.D>10: + if self.input_dim>10: print "Warning: the rbfcos kernel requires a lot of memory for high dimensional inputs" self.ARD = ARD - #set the default frequencies and bandwidths, appropriate Nparam + #set the default frequencies and bandwidths, appropriate num_params if ARD: - self.Nparam = 2*self.D + 1 + self.num_params = 2*self.input_dim + 1 if frequencies is not None: frequencies = np.asarray(frequencies) - assert frequencies.size == self.D, "bad number of frequencies" + assert frequencies.size == self.input_dim, "bad number of frequencies" else: - frequencies = np.ones(self.D) + frequencies = np.ones(self.input_dim) if bandwidths is not None: bandwidths = np.asarray(bandwidths) - assert bandwidths.size == self.D, "bad number of bandwidths" + assert bandwidths.size == self.input_dim, "bad number of bandwidths" else: - bandwidths = np.ones(self.D) + bandwidths = np.ones(self.input_dim) else: - self.Nparam = 3 + self.num_params = 3 if frequencies is not None: frequencies = np.asarray(frequencies) assert frequencies.size == 1, "Exactly one frequency needed for non-ARD kernel" @@ -51,19 +51,19 @@ class rbfcos(kernpart): return np.hstack((self.variance,self.frequencies, self.bandwidths)) def _set_params(self,x): - assert x.size==(self.Nparam) + assert x.size==(self.num_params) if self.ARD: self.variance = x[0] - self.frequencies = x[1:1+self.D] - self.bandwidths = x[1+self.D:] + self.frequencies = x[1:1+self.input_dim] + self.bandwidths = x[1+self.input_dim:] else: self.variance, self.frequencies, self.bandwidths = x def _get_param_names(self): - if self.Nparam == 3: + if self.num_params == 3: return ['variance','frequency','bandwidth'] else: - return ['variance']+['frequency_%i'%i for i in range(self.D)]+['bandwidth_%i'%i for i in range(self.D)] + return ['variance']+['frequency_%i'%i for i in range(self.input_dim)]+['bandwidth_%i'%i for i in range(self.input_dim)] def K(self,X,X2,target): self._K_computations(X,X2) @@ -76,9 +76,9 @@ class rbfcos(kernpart): self._K_computations(X,X2) target[0] += np.sum(dL_dK*self._dvar) if self.ARD: - for q in xrange(self.D): + for q in xrange(self.input_dim): target[q+1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.tan(2.*np.pi*self._dist[:,:,q]*self.frequencies[q])*self._dist[:,:,q]) - target[q+1+self.D] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self._dist2[:,:,q]) + target[q+1+self.input_dim] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self._dist2[:,:,q]) else: target[1] += -2.*np.pi*self.variance*np.sum(dL_dK*self._dvar*np.sum(np.tan(2.*np.pi*self._dist*self.frequencies)*self._dist,-1)) target[2] += -2.*np.pi**2*self.variance*np.sum(dL_dK*self._dvar*self._dist2.sum(-1)) @@ -100,13 +100,13 @@ class rbfcos(kernpart): self._X = X.copy() self._X2 = X2.copy() - #do the distances: this will be high memory for large D + #do the distances: this will be high memory for large input_dim #NB: we don't take the abs of the dist because cos is symmetric self._dist = X[:,None,:] - X2[None,:,:] self._dist2 = np.square(self._dist) #ensure the next section is computed: - self._params = np.empty(self.Nparam) + self._params = np.empty(self.num_params) if not np.all(self._params == self._get_params()): self._params == self._get_params().copy() diff --git a/GPy/kern/spline.py b/GPy/kern/spline.py index 030b2f02..f2802180 100644 --- a/GPy/kern/spline.py +++ b/GPy/kern/spline.py @@ -2,28 +2,28 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np import hashlib def theta(x): """Heaviside step function""" return np.where(x>=0.,1.,0.) -class spline(kernpart): +class spline(Kernpart): """ Spline kernel - :param D: the number of input dimensions (fixed to 1 right now TODO) - :type D: int + :param input_dim: the number of input dimensions (fixed to 1 right now TODO) + :type input_dim: int :param variance: the variance of the kernel :type variance: float """ - def __init__(self,D,variance=1.,lengthscale=1.): - self.D = D - assert self.D==1 - self.Nparam = 1 + def __init__(self,input_dim,variance=1.,lengthscale=1.): + self.input_dim = input_dim + assert self.input_dim==1 + self.num_params = 1 self.name = 'spline' self._set_params(np.squeeze(variance)) diff --git a/GPy/kern/symmetric.py b/GPy/kern/symmetric.py index c3b046c7..c7099a6f 100644 --- a/GPy/kern/symmetric.py +++ b/GPy/kern/symmetric.py @@ -1,27 +1,27 @@ # Copyright (c) 2012 James Hensman # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np -class symmetric(kernpart): +class symmetric(Kernpart): """ Symmetrical kernels :param k: the kernel to symmetrify - :type k: kernpart + :type k: Kernpart :param transform: the transform to use in symmetrification (allows symmetry on specified axes) - :type transform: A numpy array (D x D) specifiying the transform - :rtype: kernpart + :type transform: A numpy array (input_dim x input_dim) specifiying the transform + :rtype: Kernpart """ def __init__(self,k,transform=None): if transform is None: - transform = np.eye(k.D)*-1. - assert transform.shape == (k.D, k.D) + transform = np.eye(k.input_dim)*-1. + assert transform.shape == (k.input_dim, k.input_dim) self.transform = transform - self.D = k.D - self.Nparam = k.Nparam + self.input_dim = k.input_dim + self.num_params = k.num_params self.name = k.name + '_symm' self.k = k self._set_params(k._get_params()) diff --git a/GPy/kern/sympykern.py b/GPy/kern/sympykern.py index db3cc976..def1bc5f 100644 --- a/GPy/kern/sympykern.py +++ b/GPy/kern/sympykern.py @@ -9,9 +9,9 @@ import sys current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) import tempfile import pdb -from kernpart import kernpart +from kernpart import Kernpart -class spkern(kernpart): +class spkern(Kernpart): """ A kernel object, where all the hard work in done by sympy. @@ -26,7 +26,7 @@ class spkern(kernpart): - to handle multiple inputs, call them x1, z1, etc - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO """ - def __init__(self,D,k,param=None): + def __init__(self,input_dim,k,param=None): self.name='sympykern' self._sp_k = k sp_vars = [e for e in k.atoms() if e.is_Symbol] @@ -35,15 +35,15 @@ class spkern(kernpart): assert all([x.name=='x%i'%i for i,x in enumerate(self._sp_x)]) assert all([z.name=='z%i'%i for i,z in enumerate(self._sp_z)]) assert len(self._sp_x)==len(self._sp_z) - self.D = len(self._sp_x) - assert self.D == D + self.input_dim = len(self._sp_x) + assert self.input_dim == input_dim self._sp_theta = sorted([e for e in sp_vars if not (e.name[0]=='x' or e.name[0]=='z')],key=lambda e:e.name) - self.Nparam = len(self._sp_theta) + self.num_params = len(self._sp_theta) #deal with param if param is None: - param = np.ones(self.Nparam) - assert param.size==self.Nparam + param = np.ones(self.num_params) + assert param.size==self.num_params self._set_params(param) #Differentiate! @@ -69,15 +69,15 @@ class spkern(kernpart): def compute_psi_stats(self): #define some normal distributions - mus = [sp.var('mu%i'%i,real=True) for i in range(self.D)] - Ss = [sp.var('S%i'%i,positive=True) for i in range(self.D)] + mus = [sp.var('mu%i'%i,real=True) for i in range(self.input_dim)] + Ss = [sp.var('S%i'%i,positive=True) for i in range(self.input_dim)] normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)] #do some integration! #self._sp_psi0 = ?? self._sp_psi1 = self._sp_k - for i in range(self.D): - print 'perfoming integrals %i of %i'%(i+1,2*self.D) + for i in range(self.input_dim): + print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim) sys.stdout.flush() self._sp_psi1 *= normals[i] self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo)) @@ -85,10 +85,10 @@ class spkern(kernpart): self._sp_psi1 = self._sp_psi1.simplify() #and here's psi2 (eek!) - zprime = [sp.Symbol('zp%i'%i) for i in range(self.D)] + zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)] self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime)) - for i in range(self.D): - print 'perfoming integrals %i of %i'%(self.D+i+1,2*self.D) + for i in range(self.input_dim): + print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim) sys.stdout.flush() self._sp_psi2 *= normals[i] self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo)) @@ -113,21 +113,21 @@ class spkern(kernpart): self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code) #Here's some code to do the looping for K - arglist = ", ".join(["X[i*D+%s]"%x.name[1:] for x in self._sp_x]\ - + ["Z[j*D+%s]"%z.name[1:] for z in self._sp_z]\ - + ["param[%i]"%i for i in range(self.Nparam)]) + arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]\ + + ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]\ + + ["param[%i]"%i for i in range(self.num_params)]) self._K_code =\ """ int i; int j; int N = target_array->dimensions[0]; - int M = target_array->dimensions[1]; - int D = X_array->dimensions[1]; + int num_inducing = target_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; //#pragma omp parallel for private(j) for (i=0;idimensions[0]; - int D = X_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; //#pragma omp parallel for for (i=0;idimensions[0]; - int M = partial_array->dimensions[1]; - int D = X_array->dimensions[1]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; //#pragma omp parallel for private(j) for (i=0;idimensions[0]; - int D = X_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; for (i=0;idimensions[0]; - int M = partial_array->dimensions[1]; - int D = X_array->dimensions[1]; + int num_inducing = partial_array->dimensions[1]; + int input_dim = X_array->dimensions[1]; //#pragma omp parallel for private(j) for (i=0;idimensions[0]; - int M = 0; - int D = X_array->dimensions[1]; + int num_inducing = 0; + int input_dim = X_array->dimensions[1]; for (i=0;i self.N: self.YYT = np.dot(self.Y, self.Y.T) @@ -52,9 +52,9 @@ class Gaussian(likelihood): def _set_params(self, x): x = np.float64(x) - if self._variance != x: + if np.all(self._variance != x): if x == 0.: - self.precision = None + self.precision = np.inf self.V = None else: self.precision = 1. / x @@ -68,9 +68,9 @@ class Gaussian(likelihood): """ mean = mu * self._scale + self._offset if full_cov: - if self.D > 1: + if self.output_dim > 1: raise NotImplementedError, "TODO" - # Note. for D>1, we need to re-normalise all the outputs independently. + # Note. for output_dim>1, we need to re-normalise all the outputs independently. # This will mess up computations of diag(true_var), below. # note that the upper, lower quantiles should be the same shape as mean # Augment the output variance with the likelihood variance and rescale. diff --git a/GPy/likelihoods/likelihood_functions.py b/GPy/likelihoods/likelihood_functions.py index 00100d17..c801b9a9 100644 --- a/GPy/likelihoods/likelihood_functions.py +++ b/GPy/likelihoods/likelihood_functions.py @@ -10,12 +10,12 @@ from ..util.plot import gpplot from ..util.univariate_Gaussian import std_norm_pdf,std_norm_cdf import link_functions -class likelihood_function(object): +class LikelihoodFunction(object): """ Likelihood class for doing Expectation propagation :param Y: observed output (Nx1 numpy.darray) - ..Note:: Y values allowed depend on the likelihood_function used + ..Note:: Y values allowed depend on the LikelihoodFunction used """ def __init__(self,link): if link == self._analytical: @@ -69,7 +69,7 @@ class likelihood_function(object): sigma2_hat = m2 - mu_hat**2 # Second central moment return float(Z_hat), float(mu_hat), float(sigma2_hat) -class binomial(likelihood_function): +class Binomial(LikelihoodFunction): """ Probit likelihood Y is expected to take values in {-1,1} @@ -82,7 +82,7 @@ class binomial(likelihood_function): self._analytical = link_functions.probit if not link: link = self._analytical - super(binomial, self).__init__(link) + super(Binomial, self).__init__(link) def _distribution(self,gp,obs): pass @@ -134,7 +134,7 @@ class binomial(likelihood_function): p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var)) return mean[:,None], np.nan*var, p_025[:,None], p_975[:,None] # TODO: var -class Poisson(likelihood_function): +class Poisson(LikelihoodFunction): """ Poisson likelihood Y is expected to take values in {0,1,2,...} diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index d762f3e4..f18e89db 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -1,15 +1,12 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) - -from GP_regression import GP_regression -from GP_classification import GP_classification -from sparse_GP_regression import sparse_GP_regression -from sparse_GP_classification import sparse_GP_classification -from GPLVM import GPLVM -from warped_GP import warpedGP -from sparse_GPLVM import sparse_GPLVM -from Bayesian_GPLVM import Bayesian_GPLVM +from gp_regression import GPRegression +from gp_classification import GPClassification +from sparse_gp_regression import SparseGPRegression +from sparse_gp_classification import SparseGPClassification +from fitc_classification import FITCClassification +from gplvm import GPLVM +from warped_gp import WarpedGP +from bayesian_gplvm import BayesianGPLVM from mrd import MRD -from generalized_FITC import generalized_FITC -from FITC import FITC diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/bayesian_gplvm.py similarity index 90% rename from GPy/models/Bayesian_GPLVM.py rename to GPy/models/bayesian_gplvm.py index e69c4840..8043c635 100644 --- a/GPy/models/Bayesian_GPLVM.py +++ b/GPy/models/bayesian_gplvm.py @@ -2,21 +2,16 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -import pylab as pb -import sys, pdb -from GPLVM import GPLVM -from ..core import sparse_GP -from GPy.util.linalg import pdinv +from ..core import SparseGP from ..likelihoods import Gaussian from .. import kern -from numpy.linalg.linalg import LinAlgError import itertools from matplotlib.colors import colorConverter -from matplotlib.figure import SubplotParams from GPy.inference.optimization import SCG from GPy.util import plot_latent +from GPy.models.gplvm import GPLVM -class Bayesian_GPLVM(sparse_GP, GPLVM): +class BayesianGPLVM(SparseGP, GPLVM): """ Bayesian Gaussian Process Latent Variable Model @@ -28,7 +23,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): :type init: 'PCA'|'random' """ - def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', M=10, + def __init__(self, likelihood_or_Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, Z=None, kernel=None, oldpsave=10, _debug=False, **kwargs): if type(likelihood_or_Y) is np.ndarray: @@ -44,7 +39,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1) if Z is None: - Z = np.random.permutation(X.copy())[:M] + Z = np.random.permutation(X.copy())[:num_inducing] assert Z.shape[1] == X.shape[1] if kernel is None: @@ -64,7 +59,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): self._savedpsiKmm = [] self._savedABCD = [] - sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs) + SparseGP.__init__(self, X, likelihood, kernel, Z=Z, X_variance=X_variance, **kwargs) self._set_params(self._get_params()) @property @@ -78,21 +73,21 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): self._oldps.insert(0, p.copy()) def _get_param_names(self): - X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) - S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) - return (X_names + S_names + sparse_GP._get_param_names(self)) + X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + return (X_names + S_names + SparseGP._get_param_names(self)) def _get_params(self): """ Horizontally stacks the parameters in order to present them to the optimizer. - The resulting 1-D array has this structure: + The resulting 1-input_dim array has this structure: =============================================================== | mu | S | Z | theta | beta | =============================================================== """ - x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self))) + x = np.hstack((self.X.flatten(), self.X_variance.flatten(), SparseGP._get_params(self))) return x def _clipped(self, x): @@ -101,10 +96,10 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): def _set_params(self, x, save_old=True, save_count=0): # try: x = self._clipped(x) - N, input_dim = self.N, self.input_dim + N, input_dim = self.num_data, self.input_dim self.X = x[:self.X.size].reshape(N, input_dim).copy() self.X_variance = x[(N * input_dim):(2 * N * input_dim)].reshape(N, input_dim).copy() - sparse_GP._set_params(self, x[(2 * N * input_dim):]) + SparseGP._set_params(self, x[(2 * N * input_dim):]) # self.oldps = x # except (LinAlgError, FloatingPointError, ZeroDivisionError): # print "\rWARNING: Caught LinAlgError, continueing without setting " @@ -131,10 +126,10 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): def KL_divergence(self): var_mean = np.square(self.X).sum() var_S = np.sum(self.X_variance - np.log(self.X_variance)) - return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.N + return 0.5 * (var_mean + var_S) - 0.5 * self.input_dim * self.num_data def log_likelihood(self): - ll = sparse_GP.log_likelihood(self) + ll = SparseGP.log_likelihood(self) kl = self.KL_divergence() # if ll < -2E4: @@ -151,14 +146,14 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]]) # sf2 = self.scale_factor ** 2 if self.likelihood.is_heteroscedastic: - A = -0.5 * self.N * self.D * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y) -# B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2) - B = -0.5 * self.D * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) + A = -0.5 * self.num_data * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.V * self.likelihood.Y) +# B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A) * sf2) + B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) else: - A = -0.5 * self.N * self.D * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT -# B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2) - B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) - C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2)) + A = -0.5 * self.num_data * self.input_dim * (np.log(2.*np.pi) + np.log(self.likelihood._variance)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT +# B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A) * sf2) + B = -0.5 * self.input_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) + C = -self.input_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) self._savedABCD.append([self.f_call, A, B, C, D]) @@ -181,7 +176,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): # d_dS = (dL_dS).flatten() # ======================== self.dbound_dmuS = np.hstack((d_dmu, d_dS)) - self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self) + self.dbound_dZtheta = SparseGP._log_likelihood_gradients(self) return self._clipped(np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))) def plot_latent(self, *args, **kwargs): @@ -200,7 +195,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): means = np.zeros((N_test, input_dim)) covars = np.zeros((N_test, input_dim)) - dpsi0 = -0.5 * self.D * self.likelihood.precision + dpsi0 = -0.5 * self.input_dim * self.likelihood.precision dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods V = self.likelihood.precision * Y dpsi1 = np.dot(self.Cpsi1V, V.T) @@ -263,7 +258,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): def __getstate__(self): return (self.likelihood, self.input_dim, self.X, self.X_variance, - self.init, self.M, self.Z, self.kern, + self.init, self.num_inducing, self.Z, self.kern, self.oldpsave, self._debug) def __setstate__(self, state): @@ -271,11 +266,11 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): def _debug_filter_params(self, x): start, end = 0, self.X.size, - X = x[start:end].reshape(self.N, self.input_dim) + X = x[start:end].reshape(self.num_data, self.input_dim) start, end = end, end + self.X_variance.size - X_v = x[start:end].reshape(self.N, self.input_dim) - start, end = end, end + (self.M * self.input_dim) - Z = x[start:end].reshape(self.M, self.input_dim) + X_v = x[start:end].reshape(self.num_data, self.input_dim) + start, end = end, end + (self.num_inducing * self.input_dim) + Z = x[start:end].reshape(self.num_inducing, self.input_dim) start, end = end, end + self.input_dim theta = x[start:] return X, X_v, Z, theta @@ -353,12 +348,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6))) fig = figs[-1] ax8 = fig.add_subplot(121) - ax8.text(.5, .5, r"${\mathbf{A,B,C,D}}$", color='k', alpha=.5, transform=ax8.transAxes, + ax8.text(.5, .5, r"${\mathbf{A,B,C,input_dim}}$", color='k', alpha=.5, transform=ax8.transAxes, ha='center', va='center') ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 1], label='A') ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 2], label='B') ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 3], label='C') - ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='D') + ax8.plot(ABCD_dict[:, 0], ABCD_dict[:, 4], label='input_dim') ax8.legend() figs[-1].canvas.draw() figs[-1].tight_layout(rect=(.15, 0, 1, .86)) diff --git a/GPy/models/fitc.py b/GPy/models/fitc.py new file mode 100644 index 00000000..5df1a7b5 --- /dev/null +++ b/GPy/models/fitc.py @@ -0,0 +1,252 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +import numpy as np +import pylab as pb +from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify, pdinv +from ..util.plot import gpplot +from .. import kern +from scipy import stats, linalg +from GPy.core.sparse_gp import SparseGP + +def backsub_both_sides(L, X): + """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky""" + tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1) + return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T + +class FITC(SparseGP): + + def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): + super(FITC, self).__init__(X, likelihood, kernel, normalize_X=normalize_X) + + def update_likelihood_approximation(self): + """ + Approximates a non-gaussian likelihood using Expectation Propagation + + For a Gaussian (or direct: TODO) likelihood, no iteration is required: + this function does nothing + + Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in SparseGP. + The true precison is now 'true_precision' not 'precision'. + """ + if self.has_uncertain_inputs: + raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" + else: + self.likelihood.fit_FITC(self.Kmm, self.psi1, self.psi0) + self._set_params(self._get_params()) # update the GP + + def _computations(self): + + # factor Kmm + self.Lm = jitchol(self.Kmm) + self.Lmi, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.eye(self.num_inducing), lower=1) + Lmipsi1 = np.dot(self.Lmi, self.psi1) + self.Qnn = np.dot(Lmipsi1.T, Lmipsi1).copy() + self.Diag0 = self.psi0 - np.diag(self.Qnn) + self.beta_star = self.likelihood.precision / (1. + self.likelihood.precision * self.Diag0[:, None]) # Includes Diag0 in the precision + self.V_star = self.beta_star * self.likelihood.Y + + # The rather complex computations of self.A + if self.has_uncertain_inputs: + raise NotImplementedError + else: + if self.likelihood.is_heteroscedastic: + assert self.likelihood.input_dim == 1 + tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data))) + tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) + self.A = tdot(tmp) + + # factor B + self.B = np.eye(self.num_inducing) + self.A + self.LB = jitchol(self.B) + self.LBi = chol_inv(self.LB) + self.psi1V = np.dot(self.psi1, self.V_star) + + Lmi_psi1V, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0) + self._LBi_Lmi_psi1V, _ = linalg.lapack.flapack.dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0) + + Kmmipsi1 = np.dot(self.Lmi.T, Lmipsi1) + b_psi1_Ki = self.beta_star * Kmmipsi1.T + Ki_pbp_Ki = np.dot(Kmmipsi1, b_psi1_Ki) + Kmmi = np.dot(self.Lmi.T, self.Lmi) + LBiLmi = np.dot(self.LBi, self.Lmi) + LBL_inv = np.dot(LBiLmi.T, LBiLmi) + VVT = np.outer(self.V_star, self.V_star) + VV_p_Ki = np.dot(VVT, Kmmipsi1.T) + Ki_pVVp_Ki = np.dot(Kmmipsi1, VV_p_Ki) + psi1beta = self.psi1 * self.beta_star.T + H = self.Kmm + mdot(self.psi1, psi1beta.T) + LH = jitchol(H) + LHi = chol_inv(LH) + Hi = np.dot(LHi.T, LHi) + + betapsi1TLmiLBi = np.dot(psi1beta.T, LBiLmi.T) + alpha = np.array([np.dot(a.T, a) for a in betapsi1TLmiLBi])[:, None] + gamma_1 = mdot(VVT, self.psi1.T, Hi) + pHip = mdot(self.psi1.T, Hi, self.psi1) + gamma_2 = mdot(self.beta_star * pHip, self.V_star) + gamma_3 = self.V_star * gamma_2 + + self._dL_dpsi0 = -0.5 * self.beta_star # dA_dpsi0: logdet(self.beta_star) + self._dL_dpsi0 += .5 * self.V_star ** 2 # dA_psi0: yT*beta_star*y + self._dL_dpsi0 += .5 * alpha # dC_dpsi0 + self._dL_dpsi0 += 0.5 * mdot(self.beta_star * pHip, self.V_star) ** 2 - self.V_star * mdot(self.V_star.T, pHip * self.beta_star).T # dD_dpsi0 + + self._dL_dpsi1 = b_psi1_Ki.copy() # dA_dpsi1: logdet(self.beta_star) + self._dL_dpsi1 += -np.dot(psi1beta.T, LBL_inv) # dC_dpsi1 + self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T, Hi, self.psi1, gamma_1) # dD_dpsi1 + + self._dL_dKmm = -0.5 * np.dot(Kmmipsi1, b_psi1_Ki) # dA_dKmm: logdet(self.beta_star) + self._dL_dKmm += .5 * (LBL_inv - Kmmi) + mdot(LBL_inv, psi1beta, Kmmipsi1.T) # dC_dKmm + self._dL_dKmm += -.5 * mdot(Hi, self.psi1, gamma_1) # dD_dKmm + + self._dpsi1_dtheta = 0 + self._dpsi1_dX = 0 + self._dKmm_dtheta = 0 + self._dKmm_dX = 0 + + self._dpsi1_dX_jkj = 0 + self._dpsi1_dtheta_jkj = 0 + + for i, V_n, alpha_n, gamma_n, gamma_k in zip(range(self.num_data), self.V_star, alpha, gamma_2, gamma_3): + K_pp_K = np.dot(Kmmipsi1[:, i:(i + 1)], Kmmipsi1[:, i:(i + 1)].T) + + # Diag_dpsi1 = Diag_dA_dpsi1: yT*beta_star*y + Diag_dC_dpsi1 +Diag_dD_dpsi1 + _dpsi1 = (-V_n ** 2 - alpha_n + 2.*gamma_k - gamma_n ** 2) * Kmmipsi1.T[i:(i + 1), :] + + # Diag_dKmm = Diag_dA_dKmm: yT*beta_star*y +Diag_dC_dKmm +Diag_dD_dKmm + _dKmm = .5 * (V_n ** 2 + alpha_n + gamma_n ** 2 - 2.*gamma_k) * K_pp_K # Diag_dD_dKmm + + self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1, self.X[i:i + 1, :], self.Z) + self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm, self.Z) + + self._dKmm_dX += 2.*self.kern.dK_dX(_dKmm , self.Z) + self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T, self.Z, self.X[i:i + 1, :]) + + # the partial derivative vector for the likelihood + if self.likelihood.Nparams == 0: + # save computation here. + self.partial_for_likelihood = None + elif self.likelihood.is_heteroscedastic: + raise NotImplementedError, "heteroscedatic derivates not implemented" + else: + # likelihood is not heterscedatic + dbstar_dnoise = self.likelihood.precision * (self.beta_star ** 2 * self.Diag0[:, None] - self.beta_star) + Lmi_psi1 = mdot(self.Lmi, self.psi1) + LBiLmipsi1 = np.dot(self.LBi, Lmi_psi1) + aux_0 = np.dot(self._LBi_Lmi_psi1V.T, LBiLmipsi1) + aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T, LBiLmipsi1) + aux_2 = np.dot(LBiLmipsi1.T, self._LBi_Lmi_psi1V) + + dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise / self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y ** 2 * dbstar_dnoise) + dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T, self.LBi, Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) + dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T, self.LBi, Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) + + dD_dnoise_1 = mdot(self.V_star * LBiLmipsi1.T, LBiLmipsi1 * dbstar_dnoise.T * self.likelihood.Y.T) + alpha = mdot(LBiLmipsi1, self.V_star) + alpha_ = mdot(LBiLmipsi1.T, alpha) + dD_dnoise_2 = -0.5 * self.input_dim * np.sum(alpha_ ** 2 * dbstar_dnoise) + + dD_dnoise_1 = mdot(self.V_star.T, self.psi1.T, self.Lmi.T, self.LBi.T, self.LBi, self.Lmi, self.psi1, dbstar_dnoise * self.likelihood.Y) + dD_dnoise_2 = 0.5 * mdot(self.V_star.T, self.psi1.T, Hi, self.psi1, dbstar_dnoise * self.psi1.T, Hi, self.psi1, self.V_star) + dD_dnoise = dD_dnoise_1 + dD_dnoise_2 + + self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise + + def log_likelihood(self): + """ Compute the (lower bound on the) log marginal likelihood """ + A = -0.5 * self.num_data * self.input_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y) + C = -self.input_dim * (np.sum(np.log(np.diag(self.LB)))) + D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) + return A + C + D + + def _log_likelihood_gradients(self): + pass + return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood))) + + def dL_dtheta(self): + if self.has_uncertain_inputs: + raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" + else: + dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0, self.X) + dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1, self.X, self.Z) + dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm, X=self.Z) + dL_dtheta += self._dKmm_dtheta + dL_dtheta += self._dpsi1_dtheta + return dL_dtheta + + def dL_dZ(self): + if self.has_uncertain_inputs: + raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" + else: + dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T, self.Z, self.X) + dL_dZ += 2. * self.kern.dK_dX(self._dL_dKmm, X=self.Z) + dL_dZ += self._dpsi1_dX + dL_dZ += self._dKmm_dX + return dL_dZ + + def _raw_predict(self, Xnew, which_parts, full_cov=False): + + if self.likelihood.is_heteroscedastic: + Iplus_Dprod_i = 1. / (1. + self.Diag0 * self.likelihood.precision.flatten()) + self.Diag = self.Diag0 * Iplus_Dprod_i + self.P = Iplus_Dprod_i[:, None] * self.psi1.T + self.RPT0 = np.dot(self.Lmi, self.psi1) + self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0, ((1. - Iplus_Dprod_i) / self.Diag0)[:, None] * self.RPT0.T)) + self.R, info = linalg.flapack.dtrtrs(self.L, self.Lmi, lower=1) + self.RPT = np.dot(self.R, self.P.T) + self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T, self.RPT) + self.w = self.Diag * self.likelihood.v_tilde + self.Gamma = np.dot(self.R.T, np.dot(self.RPT, self.likelihood.v_tilde)) + self.mu = self.w + np.dot(self.P, self.Gamma) + + """ + Make a prediction for the generalized FITC model + + Arguments + --------- + X : Input prediction data - Nx1 numpy array (floats) + """ + # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T) + + # Ci = I + (RPT0)Di(RPT0).T + # C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T + # = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T + # = I - [RPT0] * (U*U.T)^-1 * [RPT0].T + # = I - V.T * V + U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn) + V, info = linalg.flapack.dtrtrs(U, self.RPT0.T, lower=1) + C = np.eye(self.num_inducing) - np.dot(V.T, V) + mu_u = np.dot(C, self.RPT0) * (1. / self.Diag0[None, :]) + # self.C = C + # self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T + # self.mu_u = mu_u + # self.U = U + # q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T) + mu_H = np.dot(mu_u, self.mu) + self.mu_H = mu_H + Sigma_H = C + np.dot(mu_u, np.dot(self.Sigma, mu_u.T)) + # q(f_star|y) = N(f_star|mu_star,sigma2_star) + Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) + KR0T = np.dot(Kx.T, self.Lmi.T) + mu_star = np.dot(KR0T, mu_H) + if full_cov: + Kxx = self.kern.K(Xnew, which_parts=which_parts) + var = Kxx + np.dot(KR0T, np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T)) + else: + Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) + var = (Kxx + np.sum(KR0T.T * np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T), 0))[:, None] + return mu_star[:, None], var + else: + raise NotImplementedError, "homoscedastic fitc not implemented" + """ + Kx = self.kern.K(self.Z, Xnew) + mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V) + if full_cov: + Kxx = self.kern.K(Xnew) + var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting + else: + Kxx = self.kern.Kdiag(Xnew) + var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) + return mu,var[:,None] + """ diff --git a/GPy/models/fitc_classification.py b/GPy/models/fitc_classification.py new file mode 100644 index 00000000..4ff441c6 --- /dev/null +++ b/GPy/models/fitc_classification.py @@ -0,0 +1,47 @@ +# Copyright (c) 2013, Ricardo Andrade +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +from ..core import FITC +from .. import likelihoods +from .. import kern +from ..likelihoods import likelihood + +class FITCClassification(FITC): + """ + FITC approximation for classification + + This is a thin wrapper around the FITC class, with a set of sensible defaults + + :param X: input observations + :param Y: observed values + :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function + :param kernel: a GPy kernel, defaults to rbf+white + :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_X: False|True + :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) + :type normalize_Y: False|True + :rtype: model object + + """ + + def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10): + if kernel is None: + kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) + + if likelihood is None: + distribution = likelihoods.likelihood_functions.Binomial() + likelihood = likelihoods.EP(Y, distribution) + elif Y is not None: + if not all(Y.flatten() == likelihood.data.flatten()): + raise Warning, 'likelihood.data and Y are different.' + + if Z is None: + i = np.random.permutation(X.shape[0])[:M] + Z = X[i].copy() + else: + assert Z.shape[1]==X.shape[1] + + FITC.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X) + self._set_params(self._get_params()) diff --git a/GPy/models/generalized_FITC.py b/GPy/models/generalized_fitc.py similarity index 52% rename from GPy/models/generalized_FITC.py rename to GPy/models/generalized_fitc.py index 6e44baf6..70fedcbc 100644 --- a/GPy/models/generalized_FITC.py +++ b/GPy/models/generalized_fitc.py @@ -2,20 +2,17 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np -import pylab as pb -from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot -from ..util.plot import gpplot -from .. import kern -from scipy import stats, linalg -from ..core import sparse_GP +from scipy import linalg +from GPy.core.sparse_gp import SparseGP +from GPy.util.linalg import mdot -def backsub_both_sides(L,X): +def backsub_both_sides(L, X): """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky""" - tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1) - return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T + tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1) + return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T -class generalized_FITC(sparse_GP): +class GeneralizedFITC(SparseGP): """ Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC. @@ -28,25 +25,26 @@ class generalized_FITC(sparse_GP): :param X_variance: The variance in the measurements of X (Gaussian variance) :type X_variance: np.ndarray (N x input_dim) | None :param Z: inducing inputs (optional, see note) - :type Z: np.ndarray (M x input_dim) | None - :param M : Number of inducing points (optional, default 10. Ignored if Z is not None) - :type M: int + :type Z: np.ndarray (num_inducing x input_dim) | None + :param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None) + :type num_inducing: int :param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales) :type normalize_(X|Y): bool """ def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): + self.Z = Z - self.M = self.Z.shape[0] + self.num_inducing = self.Z.shape[0] self.true_precision = likelihood.precision - super(generalized_FITC, self).__init__(X, likelihood, kernel=kernel, Z=self.Z, X_variance=X_variance, normalize_X=normalize_X) + super(GeneralizedFITC, self).__init__(X, likelihood, kernel=kernel, Z=self.Z, X_variance=X_variance, normalize_X=normalize_X) self._set_params(self._get_params()) def _set_params(self, p): - self.Z = p[:self.M*self.input_dim].reshape(self.M, self.input_dim) - self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) - self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) + self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim) + self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.num_params]) + self.likelihood._set_params(p[self.Z.size + self.kern.num_params:]) self._compute_kernel_matrices() self._computations() self._FITC_computations() @@ -58,15 +56,15 @@ class generalized_FITC(sparse_GP): For a Gaussian (or direct: TODO) likelihood, no iteration is required: this function does nothing - Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP. + Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in SparseGP. The true precison is now 'true_precision' not 'precision'. """ if self.has_uncertain_inputs: raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" else: - self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) + self.likelihood.fit_FITC(self.Kmm, self.psi1, self.psi0) self.true_precision = self.likelihood.precision # Save the true precision - self.likelihood.precision = self.true_precision/(1. + self.true_precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation + self.likelihood.precision = self.true_precision / (1. + self.true_precision * self.Diag0[:, None]) # Add the diagonal element of the FITC approximation self._set_params(self._get_params()) # update the GP def _FITC_computations(self): @@ -75,40 +73,40 @@ class generalized_FITC(sparse_GP): but adds a diagonal term to the covariance matrix: diag(Knn - Qnn). This function: - computes the FITC diagonal term - - removes the extra terms computed in the sparse_GP approximation + - removes the extra terms computed in the SparseGP approximation - computes the likelihood gradients wrt the true precision. """ - #NOTE the true precison is now 'true_precision' not 'precision' + # NOTE the true precison is now 'true_precision' not 'precision' if self.likelihood.is_heteroscedastic: # Compute generalized FITC's diagonal term of the covariance - self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1) - Lmipsi1 = np.dot(self.Lmi,self.psi1) - self.Qnn = np.dot(Lmipsi1.T,Lmipsi1) - #self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm) - #self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1) - #a = kj + self.Lmi, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.eye(self.num_inducing), lower=1) + Lmipsi1 = np.dot(self.Lmi, self.psi1) + self.Qnn = np.dot(Lmipsi1.T, Lmipsi1) + # self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm) + # self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1) + # a = kj self.Diag0 = self.psi0 - np.diag(self.Qnn) - Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.true_precision.flatten()) + Iplus_Dprod_i = 1. / (1. + self.Diag0 * self.true_precision.flatten()) self.Diag = self.Diag0 * Iplus_Dprod_i - self.P = Iplus_Dprod_i[:,None] * self.psi1.T - self.RPT0 = np.dot(self.Lmi,self.psi1) - self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T)) - self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1) - self.RPT = np.dot(self.R,self.P.T) - self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT) + self.P = Iplus_Dprod_i[:, None] * self.psi1.T + self.RPT0 = np.dot(self.Lmi, self.psi1) + self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0, ((1. - Iplus_Dprod_i) / self.Diag0)[:, None] * self.RPT0.T)) + self.R, info = linalg.lapack.dtrtrs(self.L, self.Lmi, lower=1) + self.RPT = np.dot(self.R, self.P.T) + self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T, self.RPT) self.w = self.Diag * self.likelihood.v_tilde - self.Gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde)) - self.mu = self.w + np.dot(self.P,self.Gamma) + self.Gamma = np.dot(self.R.T, np.dot(self.RPT, self.likelihood.v_tilde)) + self.mu = self.w + np.dot(self.P, self.Gamma) # Remove extra term from dL_dpsi1 - self.dL_dpsi1 -= mdot(self.Lmi.T,Lmipsi1*self.likelihood.precision.flatten().reshape(1,self.N)) + self.dL_dpsi1 -= mdot(self.Lmi.T,Lmipsi1 * self.likelihood.precision.flatten().reshape(1,self.num_data)) #self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm) - #self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB + #self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.num_data)) #dB #########333333 - #self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) + # self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) #########333333 @@ -116,16 +114,16 @@ class generalized_FITC(sparse_GP): else: raise NotImplementedError, "homoscedastic fitc not implemented" # Remove extra term from dL_dpsi1 - #self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision) #dB + # self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision) #dB sf = self.scale_factor - sf2 = sf**2 + sf2 = sf ** 2 # Remove extra term from dL_dKmm - self.dL_dKmm += 0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB + self.dL_dKmm += 0.5 * self.input_dim * mdot(self.Lmi.T, self.A, self.Lmi) * sf2 # dB self.dL_dpsi0 = None - #the partial derivative vector for the likelihood + # the partial derivative vector for the likelihood if self.likelihood.Nparams == 0: self.partial_for_likelihood = None elif self.likelihood.is_heteroscedastic: @@ -133,8 +131,8 @@ class generalized_FITC(sparse_GP): else: raise NotImplementedError, "homoscedastic derivatives not implemented" #likelihood is not heterscedatic - #self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2 - #self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision + #self.partial_for_likelihood = - 0.5 * self.num_data*self.input_dim*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2 + #self.partial_for_likelihood += 0.5 * self.input_dim * trace_dot(self.Bi,self.A)*self.likelihood.precision #self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1)) #TODO partial derivative vector for the likelihood not implemented @@ -142,28 +140,28 @@ class generalized_FITC(sparse_GP): """ Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel """ - dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z) + dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z) if self.has_uncertain_inputs: raise NotImplementedError, "heteroscedatic derivates not implemented" else: - #NOTE in sparse_GP this would include the gradient wrt psi0 - dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X) + # NOTE in SparseGP this would include the gradient wrt psi0 + dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X) return dL_dtheta def log_likelihood(self): """ Compute the (lower bound on the) log marginal likelihood """ - sf2 = self.scale_factor**2 + sf2 = self.scale_factor ** 2 if self.likelihood.is_heteroscedastic: - A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) + A = -0.5*self.num_data*self.input_dim*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) else: - A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT - C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2)) - #C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) + A = -0.5*self.num_data*self.input_dim*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT + C = -self.input_dim * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.num_inducing*np.log(sf2)) + #C = -0.5*self.input_dim * (self.B_logdet + self.num_inducing*np.log(sf2)) D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V)) #self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T) #D_ = 0.5*np.trace(self.Cpsi1VVpsi1) - return A+C+D + return A + C + D def _raw_predict(self, Xnew, which_parts, full_cov=False): if self.likelihood.is_heteroscedastic: @@ -177,35 +175,35 @@ class generalized_FITC(sparse_GP): # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T) # Ci = I + (RPT0)Di(RPT0).T - # C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T - # = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T + # C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T + # = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T # = I - [RPT0] * (U*U.T)^-1 * [RPT0].T # = I - V.T * V U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn) - V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1) - C = np.eye(self.M) - np.dot(V.T,V) - mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:]) - #self.C = C - #self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T - #self.mu_u = mu_u - #self.U = U + V, info = linalg.flapack.dtrtrs(U, self.RPT0.T, lower=1) + C = np.eye(self.num_inducing) - np.dot(V.T, V) + mu_u = np.dot(C, self.RPT0) * (1. / self.Diag0[None, :]) + # self.C = C + # self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T + # self.mu_u = mu_u + # self.U = U # q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T) - mu_H = np.dot(mu_u,self.mu) + mu_H = np.dot(mu_u, self.mu) self.mu_H = mu_H - Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T)) + Sigma_H = C + np.dot(mu_u, np.dot(self.Sigma, mu_u.T)) # q(f_star|y) = N(f_star|mu_star,sigma2_star) Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) - KR0T = np.dot(Kx.T,self.Lmi.T) - mu_star = np.dot(KR0T,mu_H) + KR0T = np.dot(Kx.T, self.Lmi.T) + mu_star = np.dot(KR0T, mu_H) if full_cov: - Kxx = self.kern.K(Xnew,which_parts=which_parts) - var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) + Kxx = self.kern.K(Xnew, which_parts=which_parts) + var = Kxx + np.dot(KR0T, np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T)) else: - Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts) - Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed? - var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed? - var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None] - return mu_star[:,None],var + Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) + Kxx_ = self.kern.K(Xnew, which_parts=which_parts) # TODO: RA, is this line needed? + var_ = Kxx_ + np.dot(KR0T, np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T)) # TODO: RA, is this line needed? + var = (Kxx + np.sum(KR0T.T * np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T), 0))[:, None] + return mu_star[:, None], var else: raise NotImplementedError, "homoscedastic fitc not implemented" """ diff --git a/GPy/models/GP_classification.py b/GPy/models/gp_classification.py similarity index 88% rename from GPy/models/GP_classification.py rename to GPy/models/gp_classification.py index 2b47aa08..376f0005 100644 --- a/GPy/models/GP_classification.py +++ b/GPy/models/gp_classification.py @@ -7,15 +7,15 @@ from ..core import GP from .. import likelihoods from .. import kern -class GP_classification(GP): +class GPClassification(GP): """ Gaussian Process classification - This is a thin wrapper around the models.GP class, with a set of sensible defalts + This is a thin wrapper around the models.GP class, with a set of sensible defaults :param X: input observations :param Y: observed values - :param likelihood: a GPy likelihood, defaults to binomial with probit link_function + :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function :param kernel: a GPy kernel, defaults to rbf :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True @@ -31,7 +31,7 @@ class GP_classification(GP): kernel = kern.rbf(X.shape[1]) if likelihood is None: - distribution = likelihoods.likelihood_functions.binomial() + distribution = likelihoods.likelihood_functions.Binomial() likelihood = likelihoods.EP(Y, distribution) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): diff --git a/GPy/models/GP_regression.py b/GPy/models/gp_regression.py similarity index 96% rename from GPy/models/GP_regression.py rename to GPy/models/gp_regression.py index d19ebc5b..8d0b02e0 100644 --- a/GPy/models/GP_regression.py +++ b/GPy/models/gp_regression.py @@ -7,11 +7,11 @@ from ..core import GP from .. import likelihoods from .. import kern -class GP_regression(GP): +class GPRegression(GP): """ Gaussian Process model for regression - This is a thin wrapper around the models.GP class, with a set of sensible defalts + This is a thin wrapper around the models.GP class, with a set of sensible defaults :param X: input observations :param Y: observed values diff --git a/GPy/models/GPLVM.py b/GPy/models/gplvm.py similarity index 92% rename from GPy/models/GPLVM.py rename to GPy/models/gplvm.py index 5589304a..e602a59a 100644 --- a/GPy/models/GPLVM.py +++ b/GPy/models/gplvm.py @@ -6,7 +6,7 @@ import numpy as np import pylab as pb import sys, pdb from .. import kern -from ..core import model +from ..core import Model from ..util.linalg import pdinv, PCA from ..core import GP from ..likelihoods import Gaussian @@ -42,13 +42,13 @@ class GPLVM(GP): return np.random.randn(Y.shape[0], input_dim) def _get_param_names(self): - return sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.N)],[]) + GP._get_param_names(self) + return sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.num_data)],[]) + GP._get_param_names(self) def _get_params(self): return np.hstack((self.X.flatten(), GP._get_params(self))) def _set_params(self,x): - self.X = x[:self.N*self.input_dim].reshape(self.N,self.input_dim).copy() + self.X = x[:self.num_data*self.input_dim].reshape(self.num_data,self.input_dim).copy() GP._set_params(self, x[self.X.size:]) def _log_likelihood_gradients(self): diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index e91298aa..b078fd27 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -3,17 +3,16 @@ Created on 10 Apr 2013 @author: Max Zwiessele ''' -from GPy.core import model -from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM -from GPy.core import sparse_GP +from GPy.core import Model +from GPy.core import SparseGP from GPy.util.linalg import PCA -from scipy import linalg import numpy import itertools import pylab from GPy.kern.kern import kern +from GPy.models.bayesian_gplvm import BayesianGPLVM -class MRD(model): +class MRD(Model): """ Do MRD on given Datasets in Ylist. All Ys in likelihood_list are in [N x Dn], where Dn can be different per Yn, @@ -34,18 +33,18 @@ class MRD(model): :param X_variance: Initial latent space variance :param init: [cooncat|single|random] - initialization method to use: + initialization method to use: *concat: PCA on concatenated outputs *single: PCA on each output *random: random - :param M: + :param num_inducing: number of inducing inputs to use :param Z: initial inducing inputs :param kernels: list of kernels or kernel shared for all BGPLVMS :type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default) """ - def __init__(self, likelihood_or_Y_list, input_dim, M=10, names=None, + def __init__(self, likelihood_or_Y_list, input_dim, num_inducing=10, names=None, kernels=None, initx='PCA', initz='permute', _debug=False, **kw): if names is None: @@ -62,24 +61,24 @@ class MRD(model): assert not ('kernel' in kw), "pass kernels through `kernels` argument" self.input_dim = input_dim - self.M = M + self.num_inducing = num_inducing self._debug = _debug self._init = True X = self._init_X(initx, likelihood_or_Y_list) Z = self._init_Z(initz, X) - self.bgplvms = [Bayesian_GPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.M, **kw) for l, k in zip(likelihood_or_Y_list, kernels)] + self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, num_inducing=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)] del self._init self.gref = self.bgplvms[0] - nparams = numpy.array([0] + [sparse_GP._get_params(g).size - g.Z.size for g in self.bgplvms]) + nparams = numpy.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms]) self.nparams = nparams.cumsum() - self.N = self.gref.N - self.NQ = self.N * self.input_dim - self.MQ = self.M * self.input_dim + self.num_data = self.gref.num_data + self.NQ = self.num_data * self.input_dim + self.MQ = self.num_inducing * self.input_dim - model.__init__(self) # @UndefinedVariable + Model.__init__(self) # @UndefinedVariable self._set_params(self._get_params()) @property @@ -143,15 +142,15 @@ class MRD(model): self._init_Z(initz, self.X) def _get_param_names(self): - # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) - # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) + # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) n1 = self.gref._get_param_names() n1var = n1[:self.NQ * 2 + self.MQ] map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x), itertools.izip(ns, itertools.repeat(name))) return list(itertools.chain(n1var, *(map_names(\ - sparse_GP._get_param_names(g)[self.MQ:], n) \ + SparseGP._get_param_names(g)[self.MQ:], n) \ for g, n in zip(self.bgplvms, self.names)))) def _get_params(self): @@ -165,14 +164,14 @@ class MRD(model): X = self.gref.X.ravel() X_var = self.gref.X_variance.ravel() Z = self.gref.Z.ravel() - thetas = [sparse_GP._get_params(g)[g.Z.size:] for g in self.bgplvms] + thetas = [SparseGP._get_params(g)[g.Z.size:] for g in self.bgplvms] params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)]) return params # def _set_var_params(self, g, X, X_var, Z): -# g.X = X.reshape(self.N, self.input_dim) -# g.X_variance = X_var.reshape(self.N, self.input_dim) -# g.Z = Z.reshape(self.M, self.input_dim) +# g.X = X.reshape(self.num_data, self.input_dim) +# g.X_variance = X_var.reshape(self.num_data, self.input_dim) +# g.Z = Z.reshape(self.num_inducing, self.input_dim) # # def _set_kern_params(self, g, p): # g.kern._set_params(p[:g.kern.Nparam]) @@ -206,7 +205,7 @@ class MRD(model): def log_likelihood(self): ll = -self.gref.KL_divergence() for g in self.bgplvms: - ll += sparse_GP.log_likelihood(g) + ll += SparseGP.log_likelihood(g) return ll def _log_likelihood_gradients(self): @@ -215,7 +214,7 @@ class MRD(model): dLdmu -= dKLmu dLdS -= dKLdS dLdmuS = numpy.hstack((dLdmu.flatten(), dLdS.flatten())).flatten() - dldzt1 = reduce(lambda a, b: a + b, (sparse_GP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms)) + dldzt1 = reduce(lambda a, b: a + b, (SparseGP._log_likelihood_gradients(g)[:self.MQ] for g in self.bgplvms)) return numpy.hstack((dLdmuS, dldzt1, @@ -250,9 +249,9 @@ class MRD(model): if X is None: X = self.X if init in "permute": - Z = numpy.random.permutation(X.copy())[:self.M] + Z = numpy.random.permutation(X.copy())[:self.num_inducing] elif init in "random": - Z = numpy.random.randn(self.M, self.input_dim) * X.var() + Z = numpy.random.randn(self.num_inducing, self.input_dim) * X.var() self.Z = Z return Z @@ -274,8 +273,8 @@ class MRD(model): else: return pylab.gcf() - def plot_X_1d(self): - return self.gref.plot_X_1d() + def plot_X_1d(self, *a, **kw): + return self.gref.plot_X_1d(*a, **kw) def plot_X(self, fignum=None, ax=None): fig = self._handle_plotting(fignum, ax, lambda i, g, ax: ax.imshow(g.X)) diff --git a/GPy/models/sparse_GPLVM.py b/GPy/models/sparse_GPLVM.py deleted file mode 100644 index ce71cd9b..00000000 --- a/GPy/models/sparse_GPLVM.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). -# Licensed under the BSD 3-clause license (see LICENSE.txt) - - -import numpy as np -import pylab as pb -import sys, pdb -# from .. import kern -# from ..core import model -# from ..util.linalg import pdinv, PCA -from GPLVM import GPLVM -from sparse_GP_regression import sparse_GP_regression - -class sparse_GPLVM(sparse_GP_regression, GPLVM): - """ - Sparse Gaussian Process Latent Variable Model - - :param Y: observed data - :type Y: np.ndarray - :param input_dim: latent dimensionality - :type input_dim: int - :param init: initialisation method for the latent space - :type init: 'PCA'|'random' - - """ - def __init__(self, Y, input_dim, kernel=None, init='PCA', M=10): - X = self.initialise_latent(init, input_dim, Y) - sparse_GP_regression.__init__(self, X, Y, kernel=kernel,M=M) - - def _get_param_names(self): - return (sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.N)],[]) - + sparse_GP_regression._get_param_names(self)) - - def _get_params(self): - return np.hstack((self.X.flatten(), sparse_GP_regression._get_params(self))) - - def _set_params(self,x): - self.X = x[:self.X.size].reshape(self.N,self.input_dim).copy() - sparse_GP_regression._set_params(self, x[self.X.size:]) - - def log_likelihood(self): - return sparse_GP_regression.log_likelihood(self) - - def dL_dX(self): - dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0,self.X) - dL_dX += self.kern.dK_dX(self.dL_dpsi1.T,self.X,self.Z) - - return dL_dX - - def _log_likelihood_gradients(self): - return np.hstack((self.dL_dX().flatten(), sparse_GP_regression._log_likelihood_gradients(self))) - - def plot(self): - GPLVM.plot(self) - #passing Z without a small amout of jitter will induce the white kernel where we don;t want it! - mu, var, upper, lower = sparse_GP_regression.predict(self, self.Z+np.random.randn(*self.Z.shape)*0.0001) - pb.plot(mu[:, 0] , mu[:, 1], 'ko') - - def plot_latent(self, *args, **kwargs): - input_1, input_2 = GPLVM.plot_latent(*args, **kwargs) - pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w') diff --git a/GPy/models/sparse_GP_classification.py b/GPy/models/sparse_gp_classification.py similarity index 72% rename from GPy/models/sparse_GP_classification.py rename to GPy/models/sparse_gp_classification.py index a3412ea2..9027ef07 100644 --- a/GPy/models/sparse_GP_classification.py +++ b/GPy/models/sparse_gp_classification.py @@ -3,21 +3,20 @@ import numpy as np -from ..core import sparse_GP +from ..core import SparseGP from .. import likelihoods from .. import kern from ..likelihoods import likelihood -from GP_regression import GP_regression -class sparse_GP_classification(sparse_GP): +class SparseGPClassification(SparseGP): """ sparse Gaussian Process model for classification - This is a thin wrapper around the sparse_GP class, with a set of sensible defalts + This is a thin wrapper around the sparse_GP class, with a set of sensible defaults :param X: input observations :param Y: observed values - :param likelihood: a GPy likelihood, defaults to binomial with probit link_function + :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function :param kernel: a GPy kernel, defaults to rbf+white :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True @@ -25,26 +24,24 @@ class sparse_GP_classification(sparse_GP): :type normalize_Y: False|True :rtype: model object - .. Note:: Multiple independent outputs are allowed using columns of Y - """ - def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10): + def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10): if kernel is None: kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) if likelihood is None: - distribution = likelihoods.likelihood_functions.binomial() + distribution = likelihoods.likelihood_functions.Binomial() likelihood = likelihoods.EP(Y, distribution) elif Y is not None: if not all(Y.flatten() == likelihood.data.flatten()): raise Warning, 'likelihood.data and Y are different.' if Z is None: - i = np.random.permutation(X.shape[0])[:M] + i = np.random.permutation(X.shape[0])[:num_inducing] Z = X[i].copy() else: assert Z.shape[1]==X.shape[1] - sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X) + SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X) self._set_params(self._get_params()) diff --git a/GPy/models/sparse_GP_regression.py b/GPy/models/sparse_gp_regression.py similarity index 59% rename from GPy/models/sparse_GP_regression.py rename to GPy/models/sparse_gp_regression.py index 78089ded..432d6e18 100644 --- a/GPy/models/sparse_GP_regression.py +++ b/GPy/models/sparse_gp_regression.py @@ -3,17 +3,15 @@ import numpy as np -from ..core import sparse_GP +from ..core import SparseGP from .. import likelihoods from .. import kern -from ..likelihoods import likelihood -from GP_regression import GP_regression -class sparse_GP_regression(sparse_GP): +class SparseGPRegression(SparseGP): """ Gaussian Process model for regression - This is a thin wrapper around the sparse_GP class, with a set of sensible defalts + This is a thin wrapper around the SparseGP class, with a set of sensible defalts :param X: input observations :param Y: observed values @@ -28,20 +26,20 @@ class sparse_GP_regression(sparse_GP): """ - def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10, X_variance=None): - #kern defaults to rbf (plus white for stability) + def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10, X_variance=None): + # kern defaults to rbf (plus white for stability) if kernel is None: - kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) + kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3) - #Z defaults to a subset of the data + # Z defaults to a subset of the data if Z is None: - i = np.random.permutation(X.shape[0])[:M] + i = np.random.permutation(X.shape[0])[:num_inducing] Z = X[i].copy() else: - assert Z.shape[1]==X.shape[1] + assert Z.shape[1] == X.shape[1] - #likelihood defaults to Gaussian - likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y) + # likelihood defaults to Gaussian + likelihood = likelihoods.Gaussian(Y, normalize=normalize_Y) - sparse_GP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance) + SparseGP.__init__(self, X, likelihood, kernel, Z=Z, normalize_X=normalize_X, X_variance=X_variance) self._set_params(self._get_params()) diff --git a/GPy/models/sparse_gplvm.py b/GPy/models/sparse_gplvm.py new file mode 100644 index 00000000..76fe65f1 --- /dev/null +++ b/GPy/models/sparse_gplvm.py @@ -0,0 +1,61 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + + +import numpy as np +import pylab as pb +import sys, pdb +from GPy.models.sparse_gp_regression import SparseGPRegression +from GPy.models.gplvm import GPLVM +# from .. import kern +# from ..core import model +# from ..util.linalg import pdinv, PCA + +class SparseGPLVM(SparseGPRegression, GPLVM): + """ + Sparse Gaussian Process Latent Variable Model + + :param Y: observed data + :type Y: np.ndarray + :param input_dim: latent dimensionality + :type input_dim: int + :param init: initialisation method for the latent space + :type init: 'PCA'|'random' + + """ + def __init__(self, Y, input_dim, kernel=None, init='PCA', num_inducing=10): + X = self.initialise_latent(init, input_dim, Y) + SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing) + + def _get_param_names(self): + return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + + SparseGPRegression._get_param_names(self)) + + def _get_params(self): + return np.hstack((self.X.flatten(), SparseGPRegression._get_params(self))) + + def _set_params(self, x): + self.X = x[:self.X.size].reshape(self.num_data, self.input_dim).copy() + SparseGPRegression._set_params(self, x[self.X.size:]) + + def log_likelihood(self): + return SparseGPRegression.log_likelihood(self) + + def dL_dX(self): + dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X) + dL_dX += self.kern.dK_dX(self.dL_dpsi1.T, self.X, self.Z) + + return dL_dX + + def _log_likelihood_gradients(self): + return np.hstack((self.dL_dX().flatten(), SparseGPRegression._log_likelihood_gradients(self))) + + def plot(self): + GPLVM.plot(self) + # passing Z without a small amout of jitter will induce the white kernel where we don;t want it! + mu, var, upper, lower = SparseGPRegression.predict(self, self.Z + np.random.randn(*self.Z.shape) * 0.0001) + pb.plot(mu[:, 0] , mu[:, 1], 'ko') + + def plot_latent(self, *args, **kwargs): + input_1, input_2 = GPLVM.plot_latent(*args, **kwargs) + pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w') diff --git a/GPy/models/warped_GP.py b/GPy/models/warped_gp.py similarity index 80% rename from GPy/models/warped_GP.py rename to GPy/models/warped_gp.py index 86a69226..fcef66c6 100644 --- a/GPy/models/warped_GP.py +++ b/GPy/models/warped_gp.py @@ -3,25 +3,21 @@ import numpy as np -from .. import kern -from ..core import model -from ..util.linalg import pdinv -from ..util.plot import gpplot from ..util.warping_functions import * -from GP_regression import GP_regression from ..core import GP from .. import likelihoods -from .. import kern +from GPy.util.warping_functions import TanhWarpingFunction_d +from GPy import kern -class warpedGP(GP): - def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False): +class WarpedGP(GP): + def __init__(self, X, Y, kernel=None, warping_function=None, warping_terms=3, normalize_X=False, normalize_Y=False): if kernel is None: kernel = kern.rbf(X.shape[1]) if warping_function == None: self.warping_function = TanhWarpingFunction_d(warping_terms) - self.warping_params = (np.random.randn(self.warping_function.n_terms*3+1,) * 1) + self.warping_params = (np.random.randn(self.warping_function.n_terms * 3 + 1,) * 1) Y = self._scale_data(Y) self.has_uncertain_inputs = False @@ -35,10 +31,10 @@ class warpedGP(GP): def _scale_data(self, Y): self._Ymax = Y.max() self._Ymin = Y.min() - return (Y-self._Ymin)/(self._Ymax-self._Ymin) - 0.5 + return (Y - self._Ymin) / (self._Ymax - self._Ymin) - 0.5 def _unscale_data(self, Y): - return (Y + 0.5)*(self._Ymax - self._Ymin) + self._Ymin + return (Y + 0.5) * (self._Ymax - self._Ymin) + self._Ymin def _set_params(self, x): self.warping_params = x[:self.warping_function.num_parameters] @@ -68,15 +64,15 @@ class warpedGP(GP): alpha = np.dot(self.Ki, self.likelihood.Y.flatten()) warping_grads = self.warping_function_gradients(alpha) - warping_grads = np.append(warping_grads[:,:-1].flatten(), warping_grads[0,-1]) + warping_grads = np.append(warping_grads[:, :-1].flatten(), warping_grads[0, -1]) return np.hstack((warping_grads.flatten(), ll_grads.flatten())) def warping_function_gradients(self, Kiy): grad_y = self.warping_function.fgrad_y(self.Y_untransformed, self.warping_params) grad_y_psi, grad_psi = self.warping_function.fgrad_y_psi(self.Y_untransformed, self.warping_params, - return_covar_chain = True) - djac_dpsi = ((1.0/grad_y[:,:, None, None])*grad_y_psi).sum(axis=0).sum(axis=0) - dquad_dpsi = (Kiy[:,None,None,None] * grad_psi).sum(axis=0).sum(axis=0) + return_covar_chain=True) + djac_dpsi = ((1.0 / grad_y[:, :, None, None]) * grad_y_psi).sum(axis=0).sum(axis=0) + dquad_dpsi = (Kiy[:, None, None, None] * grad_psi).sum(axis=0).sum(axis=0) return -dquad_dpsi + djac_dpsi diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py index ae72983a..ff558f6d 100644 --- a/GPy/testing/bgplvm_tests.py +++ b/GPy/testing/bgplvm_tests.py @@ -4,70 +4,71 @@ import unittest import numpy as np import GPy +from GPy.models.bayesian_gplvm import BayesianGPLVM class BGPLVMTests(unittest.TestCase): def test_bias_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) def test_linear_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) def test_rbf_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) def test_rbf_bias_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) #@unittest.skip('psi2 cross terms are NotImplemented for this combination') def test_linear_bias_kern(self): - N, M, input_dim, D = 30, 5, 4, 30 + N, num_inducing, input_dim, D = 30, 5, 4, 30 X = np.random.rand(N, input_dim) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel = k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py index a06f1090..14fa5593 100644 --- a/GPy/testing/examples_tests.py +++ b/GPy/testing/examples_tests.py @@ -9,40 +9,41 @@ import pkgutil import os import random from nose.tools import nottest +import sys class ExamplesTests(unittest.TestCase): - def _checkgrad(self, model): - self.assertTrue(model.checkgrad()) + def _checkgrad(self, Model): + self.assertTrue(Model.checkgrad()) - def _model_instance(self, model): - self.assertTrue(isinstance(model, GPy.models)) + def _model_instance(self, Model): + self.assertTrue(isinstance(Model, GPy.models)) """ -def model_instance_generator(model): +def model_instance_generator(Model): def check_model_returned(self): - self._model_instance(model) + self._model_instance(Model) return check_model_returned -def checkgrads_generator(model): +def checkgrads_generator(Model): def model_checkgrads(self): - self._checkgrad(model) + self._checkgrad(Model) return model_checkgrads """ -def model_checkgrads(model): - model.randomize() - assert model.checkgrad() +def model_checkgrads(Model): + Model.randomize() + assert Model.checkgrad() -def model_instance(model): - assert isinstance(model, GPy.core.model) +def model_instance(Model): + assert isinstance(Model, GPy.core.Model) @nottest def test_models(): examples_path = os.path.dirname(GPy.examples.__file__) - #Load modules + # Load modules for loader, module_name, is_pkg in pkgutil.iter_modules([examples_path]): - #Load examples + # Load examples module_examples = loader.find_module(module_name).load_module(module_name) print "MODULE", module_examples print "Before" @@ -56,26 +57,27 @@ def test_models(): continue print "Testing example: ", example[0] - #Generate model - model = example[1]() - print model + # Generate Model + Model = example[1]() + print Model - #Create tests for instance check + # Create tests for instance check """ - test = model_instance_generator(model) + test = model_instance_generator(Model) test.__name__ = 'test_instance_%s' % example[0] setattr(ExamplesTests, test.__name__, test) #Create tests for checkgrads check - test = checkgrads_generator(model) + test = checkgrads_generator(Model) test.__name__ = 'test_checkgrads_%s' % example[0] setattr(ExamplesTests, test.__name__, test) """ model_checkgrads.description = 'test_checkgrads_%s' % example[0] - yield model_checkgrads, model + yield model_checkgrads, Model model_instance.description = 'test_instance_%s' % example[0] - yield model_instance, model + yield model_instance, Model if __name__ == "__main__": print "Running unit tests, please be (very) patient..." - unittest.main() + # unittest.main() + test_models() diff --git a/GPy/testing/gplvm_tests.py b/GPy/testing/gplvm_tests.py index b1721a3c..8c2ba9fc 100644 --- a/GPy/testing/gplvm_tests.py +++ b/GPy/testing/gplvm_tests.py @@ -7,11 +7,11 @@ import GPy class GPLVMTests(unittest.TestCase): def test_bias_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) m = GPy.models.GPLVM(Y, input_dim, kernel = k) m.ensure_default_constraints() @@ -19,11 +19,11 @@ class GPLVMTests(unittest.TestCase): self.assertTrue(m.checkgrad()) def test_linear_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) m = GPy.models.GPLVM(Y, input_dim, kernel = k) m.ensure_default_constraints() @@ -31,11 +31,11 @@ class GPLVMTests(unittest.TestCase): self.assertTrue(m.checkgrad()) def test_rbf_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) m = GPy.models.GPLVM(Y, input_dim, kernel = k) m.ensure_default_constraints() diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index b48bc813..98c75827 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -12,7 +12,7 @@ class KernelTests(unittest.TestCase): K.constrain_fixed('2') X = np.random.rand(5,5) Y = np.ones((5,1)) - m = GPy.models.GP_regression(X,Y,K) + m = GPy.models.GPRegression(X,Y,K) self.assertTrue(m.checkgrad()) def test_fixedkernel(self): @@ -21,9 +21,9 @@ class KernelTests(unittest.TestCase): """ X = np.random.rand(30, 4) K = np.dot(X, X.T) - kernel = GPy.kern.fixed(4, K) + kernel = GPy.kern.Fixed(4, K) Y = np.ones((30,1)) - m = GPy.models.GP_regression(X,Y,kernel=kernel) + m = GPy.models.GPRegression(X,Y,kernel=kernel) self.assertTrue(m.checkgrad()) def test_coregionalisation(self): @@ -36,9 +36,9 @@ class KernelTests(unittest.TestCase): Y = np.vstack((Y1,Y2)) k1 = GPy.kern.rbf(1) + GPy.kern.bias(1) - k2 = GPy.kern.coregionalise(2,1) + k2 = GPy.kern.Coregionalise(2,1) k = k1.prod(k2,tensor=True) - m = GPy.models.GP_regression(X,Y,kernel=k) + m = GPy.models.GPRegression(X,Y,kernel=k) self.assertTrue(m.checkgrad()) diff --git a/GPy/testing/mrd_tests.py b/GPy/testing/mrd_tests.py index 25adbca6..b0137709 100644 --- a/GPy/testing/mrd_tests.py +++ b/GPy/testing/mrd_tests.py @@ -14,16 +14,16 @@ class MRDTests(unittest.TestCase): def test_gradients(self): num_m = 3 - N, M, input_dim, D = 20, 8, 6, 20 + N, num_inducing, input_dim, D = 20, 8, 6, 20 X = np.random.rand(N, input_dim) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) K = k.K(X) - Ylist = [np.random.multivariate_normal(np.zeros(N), K, D).T for _ in range(num_m)] + Ylist = [np.random.multivariate_normal(np.zeros(N), K, input_dim).T for _ in range(num_m)] likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist] - m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, M=M) + m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, num_inducing=num_inducing) m.ensure_default_constraints() self.assertTrue(m.checkgrad()) diff --git a/GPy/testing/prior_tests.py b/GPy/testing/prior_tests.py index d3269560..e0226751 100644 --- a/GPy/testing/prior_tests.py +++ b/GPy/testing/prior_tests.py @@ -13,7 +13,7 @@ class PriorTests(unittest.TestCase): y = b*X + C + 1*np.sin(X) y += 0.05*np.random.randn(len(X)) X, y = X[:, None], y[:, None] - m = GPy.models.GP_regression(X, y) + m = GPy.models.GPRegression(X, y) m.ensure_default_constraints() lognormal = GPy.priors.LogGaussian(1, 2) m.set_prior('rbf', lognormal) @@ -27,7 +27,7 @@ class PriorTests(unittest.TestCase): y = b*X + C + 1*np.sin(X) y += 0.05*np.random.randn(len(X)) X, y = X[:, None], y[:, None] - m = GPy.models.GP_regression(X, y) + m = GPy.models.GPRegression(X, y) m.ensure_default_constraints() Gamma = GPy.priors.Gamma(1, 1) m.set_prior('rbf', Gamma) @@ -41,7 +41,7 @@ class PriorTests(unittest.TestCase): y = b*X + C + 1*np.sin(X) y += 0.05*np.random.randn(len(X)) X, y = X[:, None], y[:, None] - m = GPy.models.GP_regression(X, y) + m = GPy.models.GPRegression(X, y) m.ensure_default_constraints() gaussian = GPy.priors.Gaussian(1, 1) success = False diff --git a/GPy/testing/psi_stat_expactation_tests.py b/GPy/testing/psi_stat_expactation_tests.py index 95f83fb5..da71754b 100644 --- a/GPy/testing/psi_stat_expactation_tests.py +++ b/GPy/testing/psi_stat_expactation_tests.py @@ -21,36 +21,36 @@ def ard(p): @testing.deepTest(__test__) class Test(unittest.TestCase): - D = 9 - M = 4 + input_dim = 9 + num_inducing = 4 N = 3 Nsamples = 6e6 def setUp(self): self.kerns = ( -# (GPy.kern.rbf(self.D, ARD=True) + -# GPy.kern.linear(self.D, ARD=True) + -# GPy.kern.bias(self.D) + -# GPy.kern.white(self.D)), - (GPy.kern.rbf(self.D, np.random.rand(), np.random.rand(self.D), ARD=True) + - GPy.kern.rbf(self.D, np.random.rand(), np.random.rand(self.D), ARD=True) + - GPy.kern.linear(self.D, np.random.rand(self.D), ARD=True) + - GPy.kern.bias(self.D) + - GPy.kern.white(self.D)), -# GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True), -# GPy.kern.linear(self.D, ARD=False), GPy.kern.linear(self.D, ARD=True), -# GPy.kern.linear(self.D) + GPy.kern.bias(self.D), -# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D), -# GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D), -# GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D), -# GPy.kern.bias(self.D), GPy.kern.white(self.D), +# (GPy.kern.rbf(self.input_dim, ARD=True) + +# GPy.kern.linear(self.input_dim, ARD=True) + +# GPy.kern.bias(self.input_dim) + +# GPy.kern.white(self.input_dim)), + (GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + + GPy.kern.rbf(self.input_dim, np.random.rand(), np.random.rand(self.input_dim), ARD=True) + + GPy.kern.linear(self.input_dim, np.random.rand(self.input_dim), ARD=True) + + GPy.kern.bias(self.input_dim) + + GPy.kern.white(self.input_dim)), +# GPy.kern.rbf(self.input_dim), GPy.kern.rbf(self.input_dim, ARD=True), +# GPy.kern.linear(self.input_dim, ARD=False), GPy.kern.linear(self.input_dim, ARD=True), +# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim), +# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim), +# GPy.kern.linear(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim), +# GPy.kern.rbf(self.input_dim) + GPy.kern.bias(self.input_dim) + GPy.kern.white(self.input_dim), +# GPy.kern.bias(self.input_dim), GPy.kern.white(self.input_dim), ) - self.q_x_mean = np.random.randn(self.D) - self.q_x_variance = np.exp(np.random.randn(self.D)) - self.q_x_samples = np.random.randn(self.Nsamples, self.D) * np.sqrt(self.q_x_variance) + self.q_x_mean - self.Z = np.random.randn(self.M, self.D) - self.q_x_mean.shape = (1, self.D) - self.q_x_variance.shape = (1, self.D) + self.q_x_mean = np.random.randn(self.input_dim) + self.q_x_variance = np.exp(np.random.randn(self.input_dim)) + self.q_x_samples = np.random.randn(self.Nsamples, self.input_dim) * np.sqrt(self.q_x_variance) + self.q_x_mean + self.Z = np.random.randn(self.num_inducing, self.input_dim) + self.q_x_mean.shape = (1, self.input_dim) + self.q_x_variance.shape = (1, self.input_dim) def test_psi0(self): for kern in self.kerns: @@ -63,7 +63,7 @@ class Test(unittest.TestCase): for kern in self.kerns: Nsamples = 100 psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance) - K_ = np.zeros((Nsamples, self.M)) + K_ = np.zeros((Nsamples, self.num_inducing)) diffs = [] for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)): K = kern.K(q_x_sample_stripe, self.Z) @@ -89,7 +89,7 @@ class Test(unittest.TestCase): for kern in self.kerns: Nsamples = 100 psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance) - K_ = np.zeros((self.M, self.M)) + K_ = np.zeros((self.num_inducing, self.num_inducing)) diffs = [] for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)): K = kern.K(q_x_sample_stripe, self.Z) diff --git a/GPy/testing/psi_stat_gradient_tests.py b/GPy/testing/psi_stat_gradient_tests.py index 23f841b5..c110d270 100644 --- a/GPy/testing/psi_stat_gradient_tests.py +++ b/GPy/testing/psi_stat_gradient_tests.py @@ -8,23 +8,23 @@ import numpy import GPy import itertools -from GPy.core import model +from GPy.core import Model -class PsiStatModel(model): - def __init__(self, which, X, X_variance, Z, M, kernel): +class PsiStatModel(Model): + def __init__(self, which, X, X_variance, Z, num_inducing, kernel): self.which = which self.X = X self.X_variance = X_variance self.Z = Z self.N, self.input_dim = X.shape - self.M, input_dim = Z.shape + self.num_inducing, input_dim = Z.shape assert self.input_dim == input_dim, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape) self.kern = kernel super(PsiStatModel, self).__init__() self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance) def _get_param_names(self): Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.input_dim))] - Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.M), range(self.input_dim))] + Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.num_inducing), range(self.input_dim))] return Xnames + Znames + self.kern._get_param_names() def _get_params(self): return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()]) @@ -34,7 +34,7 @@ class PsiStatModel(model): start, end = end, end + self.X_variance.size self.X_variance = x[start: end].reshape(self.N, self.input_dim) start, end = end, end + self.Z.size - self.Z = x[start: end].reshape(self.M, self.input_dim) + self.Z = x[start: end].reshape(self.num_inducing, self.input_dim) self.kern._set_params(x[end:]) def log_likelihood(self): return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum() @@ -43,19 +43,19 @@ class PsiStatModel(model): try: psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance) except AttributeError: - psiZ = numpy.zeros(self.M * self.input_dim) + psiZ = numpy.zeros(self.num_inducing * self.input_dim) thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten() return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad)) class DPsiStatTest(unittest.TestCase): input_dim = 5 N = 50 - M = 10 - D = 20 + num_inducing = 10 + input_dim = 20 X = numpy.random.randn(N, input_dim) X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1) - Z = numpy.random.permutation(X)[:M] - Y = X.dot(numpy.random.randn(input_dim, D)) + Z = numpy.random.permutation(X)[:num_inducing] + Y = X.dot(numpy.random.randn(input_dim, input_dim)) # kernels = [GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim)), GPy.kern.rbf(input_dim, ARD=True), GPy.kern.bias(input_dim)] kernels = [GPy.kern.linear(input_dim), GPy.kern.rbf(input_dim), GPy.kern.bias(input_dim), @@ -65,42 +65,39 @@ class DPsiStatTest(unittest.TestCase): def testPsi0(self): for k in self.kernels: m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.M, kernel=k) - try: - assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts))) - except: - import ipdb;ipdb.set_trace() + num_inducing=self.num_inducing, kernel=k) + assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts))) # def testPsi1(self): # for k in self.kernels: # m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z, -# M=self.M, kernel=k) +# num_inducing=self.num_inducing, kernel=k) # assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_lin(self): k = self.kernels[0] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.M, kernel=k) + num_inducing=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_lin_bia(self): k = self.kernels[3] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.M, kernel=k) + num_inducing=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_rbf(self): k = self.kernels[1] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.M, kernel=k) + num_inducing=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_rbf_bia(self): k = self.kernels[-1] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.M, kernel=k) + num_inducing=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_bia(self): k = self.kernels[2] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.M, kernel=k) + num_inducing=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) @@ -108,25 +105,25 @@ if __name__ == "__main__": import sys interactive = 'i' in sys.argv if interactive: -# N, M, input_dim, D = 30, 5, 4, 30 +# N, num_inducing, input_dim, input_dim = 30, 5, 4, 30 # X = numpy.random.rand(N, input_dim) # k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) # K = k.K(X) -# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T +# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, input_dim).T # Y -= Y.mean(axis=0) # k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) -# m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, M=M) +# m = GPy.models.Bayesian_GPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) # m.ensure_default_constraints() # m.randomize() # # self.assertTrue(m.checkgrad()) numpy.random.seed(0) input_dim = 5 N = 50 - M = 10 + num_inducing = 10 D = 15 X = numpy.random.randn(N, input_dim) X_var = .5 * numpy.ones_like(X) + .1 * numpy.clip(numpy.random.randn(*X.shape), 0, 1) - Z = numpy.random.permutation(X)[:M] + Z = numpy.random.permutation(X)[:num_inducing] Y = X.dot(numpy.random.randn(input_dim, D)) # kernel = GPy.kern.bias(input_dim) # @@ -136,22 +133,22 @@ if __name__ == "__main__": # for k in kernels: # m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, -# M=M, kernel=k) +# num_inducing=num_inducing, kernel=k) # assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts))) # # m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z, -# M=M, kernel=GPy.kern.linear(input_dim)) +# num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim)) # m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, -# M=M, kernel=kernel) +# num_inducing=num_inducing, kernel=kernel) # m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z, -# M=M, kernel=kernel) +# num_inducing=num_inducing, kernel=kernel) # m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, -# M=M, kernel=GPy.kern.rbf(input_dim)) +# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim)) m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, - M=M, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim))) + num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim))) m3.ensure_default_constraints() # + GPy.kern.bias(input_dim)) # m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, -# M=M, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)) +# num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim)) else: unittest.main() diff --git a/GPy/testing/sparse_gplvm_tests.py b/GPy/testing/sparse_gplvm_tests.py index a790ce54..e7f714b4 100644 --- a/GPy/testing/sparse_gplvm_tests.py +++ b/GPy/testing/sparse_gplvm_tests.py @@ -4,41 +4,42 @@ import unittest import numpy as np import GPy +from GPy.models.sparse_gplvm import SparseGPLVM class sparse_GPLVMTests(unittest.TestCase): def test_bias_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M) + m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) @unittest.skip('linear kernels do not have dKdiag_dX') def test_linear_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M) + m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) def test_rbf_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T + Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = GPy.models.sparse_GPLVM(Y, input_dim, kernel = k, M=M) + m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py index 8224c7a8..7ee9ef40 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/unit_tests.py @@ -5,33 +5,33 @@ import unittest import numpy as np import GPy +from GPy.likelihoods.likelihood_functions import Binomial class GradientTests(unittest.TestCase): def setUp(self): ###################################### - ## 1 dimensional example + # # 1 dimensional example # sample inputs and outputs - self.X1D = np.random.uniform(-3.,3.,(20,1)) - self.Y1D = np.sin(self.X1D)+np.random.randn(20,1)*0.05 + self.X1D = np.random.uniform(-3., 3., (20, 1)) + self.Y1D = np.sin(self.X1D) + np.random.randn(20, 1) * 0.05 ###################################### - ## 2 dimensional example + # # 2 dimensional example # sample inputs and outputs - self.X2D = np.random.uniform(-3.,3.,(40,2)) - self.Y2D = np.sin(self.X2D[:,0:1]) * np.sin(self.X2D[:,1:2])+np.random.randn(40,1)*0.05 + self.X2D = np.random.uniform(-3., 3., (40, 2)) + self.Y2D = np.sin(self.X2D[:, 0:1]) * np.sin(self.X2D[:, 1:2]) + np.random.randn(40, 1) * 0.05 - def check_model_with_white(self, kern, model_type='GP_regression', dimension=1): - #Get the correct gradients + def check_model_with_white(self, kern, model_type='GPRegression', dimension=1): + # Get the correct gradients if dimension == 1: X = self.X1D Y = self.Y1D else: X = self.X2D Y = self.Y2D - - #Get model type (GP_regression, GP_sparse_regression, etc) + # Get model type (GPRegression, SparseGPRegression, etc) model_fit = getattr(GPy.models, model_type) noise = GPy.kern.white(dimension) @@ -42,114 +42,114 @@ class GradientTests(unittest.TestCase): # contrain all parameters to be positive self.assertTrue(m.checkgrad()) - def test_gp_regression_rbf_1d(self): + def test_GPRegression_rbf_1d(self): ''' Testing the GP regression with rbf kernel with white kernel on 1d data ''' rbf = GPy.kern.rbf(1) - self.check_model_with_white(rbf, model_type='GP_regression', dimension=1) + self.check_model_with_white(rbf, model_type='GPRegression', dimension=1) - def test_GP_regression_rbf_2D(self): + def test_GPRegression_rbf_2D(self): ''' Testing the GP regression with rbf and white kernel on 2d data ''' rbf = GPy.kern.rbf(2) - self.check_model_with_white(rbf, model_type='GP_regression', dimension=2) + self.check_model_with_white(rbf, model_type='GPRegression', dimension=2) - def test_GP_regression_rbf_ARD_2D(self): + def test_GPRegression_rbf_ARD_2D(self): ''' Testing the GP regression with rbf and white kernel on 2d data ''' - k = GPy.kern.rbf(2,ARD=True) - self.check_model_with_white(k, model_type='GP_regression', dimension=2) + k = GPy.kern.rbf(2, ARD=True) + self.check_model_with_white(k, model_type='GPRegression', dimension=2) - def test_GP_regression_matern52_1D(self): + def test_GPRegression_matern52_1D(self): ''' Testing the GP regression with matern52 kernel on 1d data ''' matern52 = GPy.kern.Matern52(1) - self.check_model_with_white(matern52, model_type='GP_regression', dimension=1) + self.check_model_with_white(matern52, model_type='GPRegression', dimension=1) - def test_GP_regression_matern52_2D(self): + def test_GPRegression_matern52_2D(self): ''' Testing the GP regression with matern52 kernel on 2d data ''' matern52 = GPy.kern.Matern52(2) - self.check_model_with_white(matern52, model_type='GP_regression', dimension=2) + self.check_model_with_white(matern52, model_type='GPRegression', dimension=2) - def test_GP_regression_matern52_ARD_2D(self): + def test_GPRegression_matern52_ARD_2D(self): ''' Testing the GP regression with matern52 kernel on 2d data ''' - matern52 = GPy.kern.Matern52(2,ARD=True) - self.check_model_with_white(matern52, model_type='GP_regression', dimension=2) + matern52 = GPy.kern.Matern52(2, ARD=True) + self.check_model_with_white(matern52, model_type='GPRegression', dimension=2) - def test_GP_regression_matern32_1D(self): + def test_GPRegression_matern32_1D(self): ''' Testing the GP regression with matern32 kernel on 1d data ''' matern32 = GPy.kern.Matern32(1) - self.check_model_with_white(matern32, model_type='GP_regression', dimension=1) + self.check_model_with_white(matern32, model_type='GPRegression', dimension=1) - def test_GP_regression_matern32_2D(self): + def test_GPRegression_matern32_2D(self): ''' Testing the GP regression with matern32 kernel on 2d data ''' matern32 = GPy.kern.Matern32(2) - self.check_model_with_white(matern32, model_type='GP_regression', dimension=2) + self.check_model_with_white(matern32, model_type='GPRegression', dimension=2) - def test_GP_regression_matern32_ARD_2D(self): + def test_GPRegression_matern32_ARD_2D(self): ''' Testing the GP regression with matern32 kernel on 2d data ''' - matern32 = GPy.kern.Matern32(2,ARD=True) - self.check_model_with_white(matern32, model_type='GP_regression', dimension=2) + matern32 = GPy.kern.Matern32(2, ARD=True) + self.check_model_with_white(matern32, model_type='GPRegression', dimension=2) - def test_GP_regression_exponential_1D(self): + def test_GPRegression_exponential_1D(self): ''' Testing the GP regression with exponential kernel on 1d data ''' exponential = GPy.kern.exponential(1) - self.check_model_with_white(exponential, model_type='GP_regression', dimension=1) + self.check_model_with_white(exponential, model_type='GPRegression', dimension=1) - def test_GP_regression_exponential_2D(self): + def test_GPRegression_exponential_2D(self): ''' Testing the GP regression with exponential kernel on 2d data ''' exponential = GPy.kern.exponential(2) - self.check_model_with_white(exponential, model_type='GP_regression', dimension=2) + self.check_model_with_white(exponential, model_type='GPRegression', dimension=2) - def test_GP_regression_exponential_ARD_2D(self): + def test_GPRegression_exponential_ARD_2D(self): ''' Testing the GP regression with exponential kernel on 2d data ''' - exponential = GPy.kern.exponential(2,ARD=True) - self.check_model_with_white(exponential, model_type='GP_regression', dimension=2) + exponential = GPy.kern.exponential(2, ARD=True) + self.check_model_with_white(exponential, model_type='GPRegression', dimension=2) - def test_GP_regression_bias_kern_1D(self): + def test_GPRegression_bias_kern_1D(self): ''' Testing the GP regression with bias kernel on 1d data ''' bias = GPy.kern.bias(1) - self.check_model_with_white(bias, model_type='GP_regression', dimension=1) + self.check_model_with_white(bias, model_type='GPRegression', dimension=1) - def test_GP_regression_bias_kern_2D(self): + def test_GPRegression_bias_kern_2D(self): ''' Testing the GP regression with bias kernel on 2d data ''' bias = GPy.kern.bias(2) - self.check_model_with_white(bias, model_type='GP_regression', dimension=2) + self.check_model_with_white(bias, model_type='GPRegression', dimension=2) - def test_GP_regression_linear_kern_1D_ARD(self): + def test_GPRegression_linear_kern_1D_ARD(self): ''' Testing the GP regression with linear kernel on 1d data ''' - linear = GPy.kern.linear(1,ARD=True) - self.check_model_with_white(linear, model_type='GP_regression', dimension=1) + linear = GPy.kern.linear(1, ARD=True) + self.check_model_with_white(linear, model_type='GPRegression', dimension=1) - def test_GP_regression_linear_kern_2D_ARD(self): + def test_GPRegression_linear_kern_2D_ARD(self): ''' Testing the GP regression with linear kernel on 2d data ''' - linear = GPy.kern.linear(2,ARD=True) - self.check_model_with_white(linear, model_type='GP_regression', dimension=2) + linear = GPy.kern.linear(2, ARD=True) + self.check_model_with_white(linear, model_type='GPRegression', dimension=2) - def test_GP_regression_linear_kern_1D(self): + def test_GPRegression_linear_kern_1D(self): ''' Testing the GP regression with linear kernel on 1d data ''' linear = GPy.kern.linear(1) - self.check_model_with_white(linear, model_type='GP_regression', dimension=1) + self.check_model_with_white(linear, model_type='GPRegression', dimension=1) - def test_GP_regression_linear_kern_2D(self): + def test_GPRegression_linear_kern_2D(self): ''' Testing the GP regression with linear kernel on 2d data ''' linear = GPy.kern.linear(2) - self.check_model_with_white(linear, model_type='GP_regression', dimension=2) + self.check_model_with_white(linear, model_type='GPRegression', dimension=2) - def test_sparse_GP_regression_rbf_white_kern_1d(self): + def test_SparseGPRegression_rbf_white_kern_1d(self): ''' Testing the sparse GP regression with rbf kernel with white kernel on 1d data ''' rbf = GPy.kern.rbf(1) - self.check_model_with_white(rbf, model_type='sparse_GP_regression', dimension=1) + self.check_model_with_white(rbf, model_type='SparseGPRegression', dimension=1) - def test_sparse_GP_regression_rbf_white_kern_2D(self): + def test_SparseGPRegression_rbf_white_kern_2D(self): ''' Testing the sparse GP regression with rbf and white kernel on 2d data ''' rbf = GPy.kern.rbf(2) - self.check_model_with_white(rbf, model_type='sparse_GP_regression', dimension=2) + self.check_model_with_white(rbf, model_type='SparseGPRegression', dimension=2) def test_GPLVM_rbf_bias_white_kern_2D(self): """ Testing GPLVM with rbf + bias and white kernel """ N, input_dim, D = 50, 1, 2 X = np.random.rand(N, input_dim) - k = GPy.kern.rbf(input_dim, 0.5, 0.9*np.ones((1,))) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05) + k = GPy.kern.rbf(input_dim, 0.5, 0.9 * np.ones((1,))) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T - m = GPy.models.GPLVM(Y, input_dim, kernel = k) + Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T + m = GPy.models.GPLVM(Y, input_dim, kernel=k) m.ensure_default_constraints() self.assertTrue(m.checkgrad()) @@ -159,43 +159,46 @@ class GradientTests(unittest.TestCase): X = np.random.rand(N, input_dim) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim, 0.1) + GPy.kern.white(input_dim, 0.05) K = k.K(X) - Y = np.random.multivariate_normal(np.zeros(N),K,D).T - m = GPy.models.GPLVM(Y, input_dim, init = 'PCA', kernel = k) + Y = np.random.multivariate_normal(np.zeros(N), K, input_dim).T + m = GPy.models.GPLVM(Y, input_dim, init='PCA', kernel=k) m.ensure_default_constraints() self.assertTrue(m.checkgrad()) def test_GP_EP_probit(self): N = 20 - X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None] - Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None] + X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] + Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] kernel = GPy.kern.rbf(1) - distribution = GPy.likelihoods.likelihood_functions.binomial() + distribution = GPy.likelihoods.likelihood_functions.Binomial() likelihood = GPy.likelihoods.EP(Y, distribution) m = GPy.core.GP(X, likelihood, kernel) m.ensure_default_constraints() m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) - #self.assertTrue(m.EPEM) + # self.assertTrue(m.EPEM) def test_sparse_EP_DTC_probit(self): N = 20 - X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None] - Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None] - Z = np.linspace(0,15,4)[:,None] + X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None] + Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None] + Z = np.linspace(0, 15, 4)[:, None] kernel = GPy.kern.rbf(1) - distribution = GPy.likelihoods.likelihood_functions.binomial() + distribution = GPy.likelihoods.likelihood_functions.Binomial() likelihood = GPy.likelihoods.EP(Y, distribution) - m = GPy.core.sparse_GP(X, likelihood, kernel,Z) + m = GPy.core.SparseGP(X, likelihood, kernel, Z) m.ensure_default_constraints() m.update_likelihood_approximation() self.assertTrue(m.checkgrad()) def test_generalized_FITC(self): N = 20 - X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None] + X = np.hstack([np.random.rand(N / 2) + 1, np.random.rand(N / 2) - 1])[:, None] k = GPy.kern.rbf(1) + GPy.kern.white(1) Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None] - likelihood = GPy.inference.likelihoods.binomial(Y) + + distribution = GPy.likelihoods.likelihood_functions.Binomial() + likelihood = GPy.likelihoods.EP(Y, distribution) + #likelihood = GPy.inference.likelihoods.Binomial(Y) m = GPy.models.generalized_FITC(X,likelihood,k,inducing=4) m.constrain_positive('(var|len)') m.approximate_likelihood() diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index 36fa4f61..c477f283 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -22,7 +22,7 @@ def fetch_dataset(resource, save_name = None, save_file = True, messages = True) print "Downloading resource: " , resource, " ... ", response = url.urlopen(resource) # TODO: Some error checking... - # ... + # ... html = response.read() response.close() if save_file: @@ -33,8 +33,6 @@ def fetch_dataset(resource, save_name = None, save_file = True, messages = True) if messages: print "Done!" return html - - def della_gatta_TRP63_gene_expression(gene_number=None): mat_data = scipy.io.loadmat(os.path.join(data_path, 'DellaGattadata.mat')) diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 6e7d26d8..c04fc460 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -1,87 +1,80 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -#tdot function courtesy of Ian Murray: +# tdot function courtesy of Ian Murray: # Iain Murray, April 2013. iain contactable via iainmurray.net # http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot.py import numpy as np -from scipy import linalg, optimize, weave -import pylab as pb -import Tango -import sys -import re -import pdb -import cPickle +from scipy import linalg, weave import types import ctypes from ctypes import byref, c_char, c_int, c_double # TODO -#import scipy.lib.lapack -import scipy as sp +# import scipy.lib.lapack try: - _blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__) + _blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__) # @UndefinedVariable _blas_available = True except: _blas_available = False -def trace_dot(a,b): +def trace_dot(a, b): """ efficiently compute the trace of the matrix product of a and b """ - return np.sum(a*b) + return np.sum(a * b) def mdot(*args): - """Multiply all the arguments using matrix product rules. - The output is equivalent to multiplying the arguments one by one - from left to right using dot(). - Precedence can be controlled by creating tuples of arguments, - for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)). - Note that this means the output of dot(a,b) and mdot(a,b) will differ if - a or b is a pure tuple of numbers. - """ - if len(args)==1: - return args[0] - elif len(args)==2: - return _mdot_r(args[0],args[1]) - else: - return _mdot_r(args[:-1],args[-1]) + """Multiply all the arguments using matrix product rules. + The output is equivalent to multiplying the arguments one by one + from left to right using dot(). + Precedence can be controlled by creating tuples of arguments, + for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)). + Note that this means the output of dot(a,b) and mdot(a,b) will differ if + a or b is a pure tuple of numbers. + """ + if len(args) == 1: + return args[0] + elif len(args) == 2: + return _mdot_r(args[0], args[1]) + else: + return _mdot_r(args[:-1], args[-1]) -def _mdot_r(a,b): - """Recursive helper for mdot""" - if type(a)==types.TupleType: - if len(a)>1: - a = mdot(*a) - else: - a = a[0] - if type(b)==types.TupleType: - if len(b)>1: - b = mdot(*b) - else: - b = b[0] - return np.dot(a,b) +def _mdot_r(a, b): + """Recursive helper for mdot""" + if type(a) == types.TupleType: + if len(a) > 1: + a = mdot(*a) + else: + a = a[0] + if type(b) == types.TupleType: + if len(b) > 1: + b = mdot(*b) + else: + b = b[0] + return np.dot(a, b) -def jitchol(A,maxtries=5): +def jitchol(A, maxtries=5): A = np.asfortranarray(A) - L,info = linalg.lapack.flapack.dpotrf(A,lower=1) - if info ==0: + L, info = linalg.lapack.flapack.dpotrf(A, lower=1) + if info == 0: return L else: diagA = np.diag(A) - if np.any(diagA<0.): + if np.any(diagA < 0.): raise linalg.LinAlgError, "not pd: negative diagonal elements" - jitter= diagA.mean()*1e-6 - for i in range(1,maxtries+1): + jitter = diagA.mean() * 1e-6 + for i in range(1, maxtries + 1): print 'Warning: adding jitter of {:.10e}'.format(jitter) try: - return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True) + return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) except: jitter *= 10 - raise linalg.LinAlgError,"not positive definite, even with jitter." + raise linalg.LinAlgError, "not positive definite, even with jitter." -def jitchol_old(A,maxtries=5): +def jitchol_old(A, maxtries=5): """ :param A : An almost pd square matrix @@ -93,20 +86,20 @@ def jitchol_old(A,maxtries=5): np.allclose(sp.linalg.cholesky(XXT, lower = True), np.triu(sp.linalg.cho_factor(XXT)[0]).T) """ try: - return linalg.cholesky(A, lower = True) + return linalg.cholesky(A, lower=True) except linalg.LinAlgError: diagA = np.diag(A) - if np.any(diagA<0.): + if np.any(diagA < 0.): raise linalg.LinAlgError, "not pd: negative diagonal elements" - jitter= diagA.mean()*1e-6 - for i in range(1,maxtries+1): + jitter = diagA.mean() * 1e-6 + for i in range(1, maxtries + 1): print '\rWarning: adding jitter of {:.10e} '.format(jitter), try: - return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True) + return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) except: jitter *= 10 - raise linalg.LinAlgError,"not positive definite, even with jitter." + raise linalg.LinAlgError, "not positive definite, even with jitter." def pdinv(A, *args): """ @@ -125,7 +118,7 @@ def pdinv(A, *args): logdet = 2.*np.sum(np.log(np.diag(L))) Li = chol_inv(L) Ai, _ = linalg.lapack.flapack.dpotri(L) - #Ai = np.tril(Ai) + np.tril(Ai,-1).T + # Ai = np.tril(Ai) + np.tril(Ai,-1).T symmetrify(Ai) return Ai, L, Li, logdet @@ -140,7 +133,7 @@ def chol_inv(L): """ - return linalg.lapack.flapack.dtrtri(L, lower = True)[0] + return linalg.lapack.flapack.dtrtri(L, lower=True)[0] def multiple_pdinv(A): @@ -155,11 +148,11 @@ def multiple_pdinv(A): hld: 0.5* the log of the determinants of A """ N = A.shape[-1] - chols = [jitchol(A[:,:,i]) for i in range(N)] + chols = [jitchol(A[:, :, i]) for i in range(N)] halflogdets = [np.sum(np.log(np.diag(L[0]))) for L in chols] - invs = [linalg.lapack.flapack.dpotri(L[0],True)[0] for L in chols] - invs = [np.triu(I)+np.triu(I,1).T for I in invs] - return np.dstack(invs),np.array(halflogdets) + invs = [linalg.lapack.flapack.dpotri(L[0], True)[0] for L in chols] + invs = [np.triu(I) + np.triu(I, 1).T for I in invs] + return np.dstack(invs), np.array(halflogdets) def PCA(Y, input_dim): @@ -179,18 +172,18 @@ def PCA(Y, input_dim): if not np.allclose(Y.mean(axis=0), 0.0): print "Y is not zero mean, centering it locally (GPy.util.linalg.PCA)" - #Y -= Y.mean(axis=0) + # Y -= Y.mean(axis=0) - Z = linalg.svd(Y-Y.mean(axis=0), full_matrices = False) - [X, W] = [Z[0][:,0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:,0:input_dim]] + Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False) + [X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]] v = X.std(axis=0) X /= v; W *= v; return X, W.T -def tdot_numpy(mat,out=None): - return np.dot(mat,mat.T,out) +def tdot_numpy(mat, out=None): + return np.dot(mat, mat.T, out) def tdot_blas(mat, out=None): """returns np.dot(mat, mat.T), but faster for large 2D arrays of doubles.""" @@ -198,16 +191,16 @@ def tdot_blas(mat, out=None): return np.dot(mat, mat.T) nn = mat.shape[0] if out is None: - out = np.zeros((nn,nn)) + out = np.zeros((nn, nn)) else: assert(out.dtype == 'float64') - assert(out.shape == (nn,nn)) + assert(out.shape == (nn, nn)) # FIXME: should allow non-contiguous out, and copy output into it: assert(8 in out.strides) # zeroing needed because of dumb way I copy across triangular answer out[:] = 0.0 - ## Call to DSYRK from BLAS + # # Call to DSYRK from BLAS # If already in Fortran order (rare), and has the right sorts of strides I # could avoid the copy. I also thought swapping to cblas API would allow use # of C order. However, I tried that and had errors with large matrices: @@ -226,17 +219,17 @@ def tdot_blas(mat, out=None): _blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K), byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC)) - symmetrify(out,upper=True) + symmetrify(out, upper=True) return out def tdot(*args, **kwargs): if _blas_available: - return tdot_blas(*args,**kwargs) + return tdot_blas(*args, **kwargs) else: - return tdot_numpy(*args,**kwargs) + return tdot_numpy(*args, **kwargs) -def DSYR_blas(A,x,alpha=1.): +def DSYR_blas(A, x, alpha=1.): """ Performs a symmetric rank-1 update operation: A <- A + alpha * np.dot(x,x.T) @@ -256,9 +249,9 @@ def DSYR_blas(A,x,alpha=1.): INCX = c_int(1) _blaslib.dsyr_(byref(UPLO), byref(N), byref(ALPHA), x_, byref(INCX), A_, byref(LDA)) - symmetrify(A,upper=True) + symmetrify(A, upper=True) -def DSYR_numpy(A,x,alpha=1.): +def DSYR_numpy(A, x, alpha=1.): """ Performs a symmetric rank-1 update operation: A <- A + alpha * np.dot(x,x.T) @@ -269,23 +262,23 @@ def DSYR_numpy(A,x,alpha=1.): :param x: Nx1 np.array :param alpha: scalar """ - A += alpha*np.dot(x[:,None],x[None,:]) + A += alpha * np.dot(x[:, None], x[None, :]) def DSYR(*args, **kwargs): if _blas_available: - return DSYR_blas(*args,**kwargs) + return DSYR_blas(*args, **kwargs) else: - return DSYR_numpy(*args,**kwargs) + return DSYR_numpy(*args, **kwargs) -def symmetrify(A,upper=False): +def symmetrify(A, upper=False): """ Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper works IN PLACE. """ - N,M = A.shape - assert N==M + N, M = A.shape + assert N == M c_contig_code = """ int iN; for (int i=1; i """ - code=""" + code = """ double r,c,s; int j,i; for(j=0; j