diff --git a/GPy/core/fitc.py b/GPy/core/fitc.py index 639e0561..604db5e8 100644 --- a/GPy/core/fitc.py +++ b/GPy/core/fitc.py @@ -14,7 +14,7 @@ class FITC(SparseGP): sparse FITC approximation :param X: inputs - :type X: np.ndarray (N x Q) + :type X: np.ndarray (num_data x Q) :param likelihood: a likelihood instance, containing the observed data :type likelihood: GPy.likelihood.(Gaussian | EP) :param kernel : the kernel (covariance function). See link kernels @@ -57,7 +57,7 @@ class FITC(SparseGP): self.V_star = self.beta_star * self.likelihood.Y # The rather complex computations of self.A - tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.N))) + tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data))) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) self.A = tdot(tmp) @@ -113,7 +113,7 @@ class FITC(SparseGP): self._dpsi1_dX_jkj = 0 self._dpsi1_dtheta_jkj = 0 - for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.N),self.V_star,alpha,gamma_2,gamma_3): + for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.num_data),self.V_star,alpha,gamma_2,gamma_3): K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T) _dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:] _dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm @@ -137,14 +137,14 @@ class FITC(SparseGP): aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1) aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V) - dA_dnoise = 0.5 * self.D * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.D * np.sum(self.likelihood.Y**2 * dbstar_dnoise) + dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise) dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T) dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T) alpha = mdot(LBiLmipsi1,self.V_star) alpha_ = mdot(LBiLmipsi1.T,alpha) - dD_dnoise_2 = -0.5 * self.D * np.sum(alpha_**2 * dbstar_dnoise ) + dD_dnoise_2 = -0.5 * self.input_dim * np.sum(alpha_**2 * dbstar_dnoise ) dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y) dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star) @@ -154,7 +154,7 @@ class FITC(SparseGP): def log_likelihood(self): """ Compute the (lower bound on the) log marginal likelihood """ - A = -0.5 * self.N * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y) + A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y) C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) return A + C + D @@ -204,8 +204,8 @@ class FITC(SparseGP): # q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T) # Ci = I + (RPT0)Di(RPT0).T - # C = I - [RPT0] * (D+[RPT0].T*[RPT0])^-1*[RPT0].T - # = I - [RPT0] * (D + self.Qnn)^-1 * [RPT0].T + # C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T + # = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T # = I - [RPT0] * (U*U.T)^-1 * [RPT0].T # = I - V.T * V U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn) diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 7b6f46b0..246b8cc9 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -33,8 +33,8 @@ class GP(GPBase): self._set_params(self._get_params()) def _set_params(self, p): - self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()]) - self.likelihood._set_params(p[self.kern.Nparam_transformed():]) + self.kern._set_params_transformed(p[:self.kern.num_params_transformed()]) + self.likelihood._set_params(p[self.kern.num_params_transformed():]) self.K = self.kern.K(self.X) self.K += self.likelihood.covariance_matrix @@ -46,12 +46,12 @@ class GP(GPBase): #alpha = np.dot(self.Ki, self.likelihood.Y) alpha,_ = linalg.lapack.flapack.dpotrs(self.L, self.likelihood.Y,lower=1) - self.dL_dK = 0.5 * (tdot(alpha) - self.input_dim * self.Ki) + self.dL_dK = 0.5 * (tdot(alpha) - self.output_dim * self.Ki) else: #tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki) tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1) tmp, _ = linalg.lapack.flapack.dpotrs(self.L, np.asfortranarray(tmp.T), lower=1) - self.dL_dK = 0.5 * (tmp - self.input_dim * self.Ki) + self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki) def _get_params(self): return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params())) diff --git a/GPy/core/gp_base.py b/GPy/core/gp_base.py index 11b4726b..9188fe6f 100644 --- a/GPy/core/gp_base.py +++ b/GPy/core/gp_base.py @@ -1,24 +1,24 @@ import numpy as np -import model from .. import kern from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D import pylab as pb +from GPy.core.model import Model -class GPBase(model.model): +class GPBase(Model): """ - Gaussian Process model for holding shared behaviour between + Gaussian Process Model for holding shared behaviour between sprase_GP and GP models """ def __init__(self, X, likelihood, kernel, normalize_X=False): self.X = X assert len(self.X.shape) == 2 - self.N, self.input_dim = self.X.shape + self.num_data, self.input_dim = self.X.shape assert isinstance(kernel, kern.kern) self.kern = kernel self.likelihood = likelihood assert self.X.shape[0] == self.likelihood.data.shape[0] - self.N, self.output_dim = self.likelihood.data.shape + self.num_data, self.output_dim = self.likelihood.data.shape if normalize_X: self._Xmean = X.mean(0)[None, :] @@ -28,7 +28,7 @@ class GPBase(model.model): self._Xmean = np.zeros((1, self.input_dim)) self._Xstd = np.ones((1, self.input_dim)) - model.model.__init__(self) + Model.__init__(self) # All leaf nodes should call self._set_params(self._get_params()) at # the end @@ -84,8 +84,8 @@ class GPBase(model.model): Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution) m, v = self._raw_predict(Xnew, which_parts=which_parts) m = m.reshape(resolution, resolution).T - ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) - ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) + ax.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable + ax.scatter(self.X[:, 0], self.X[:, 1], 40, self.likelihood.Y, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max()) # @UndefinedVariable ax.set_xlim(xmin[0], xmax[0]) ax.set_ylim(xmin[1], xmax[1]) else: @@ -94,9 +94,9 @@ class GPBase(model.model): def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None): """ TODO: Docstrings! + :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure - """ # TODO include samples if which_data == 'all': @@ -111,7 +111,7 @@ class GPBase(model.model): Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits) - m, var, lower, upper = self.predict(Xnew, which_parts=which_parts) + m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) for d in range(m.shape[1]): gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax) ax.plot(Xu[which_data], self.likelihood.data[which_data, d], 'kx', mew=1.5) @@ -122,13 +122,13 @@ class GPBase(model.model): elif self.X.shape[1] == 2: # FIXME resolution = resolution or 50 - Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) + Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution) x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution) - m, var, lower, upper = self.predict(Xnew, which_parts=which_parts) + m, _, lower, upper = self.predict(Xnew, which_parts=which_parts) m = m.reshape(resolution, resolution).T - ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) + ax.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable Yf = self.likelihood.Y.flatten() - ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) + ax.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.) # @UndefinedVariable ax.set_xlim(xmin[0], xmax[0]) ax.set_ylim(xmin[1], xmax[1]) diff --git a/GPy/core/model.py b/GPy/core/model.py index 7cc21080..582d7313 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -6,37 +6,32 @@ from .. import likelihoods from ..inference import optimization from ..util.linalg import jitchol from GPy.util.misc import opt_wrapper -from parameterised import parameterised -from scipy import optimize +from parameterised import Parameterised import multiprocessing as mp import numpy as np -import priors -import re -import sys -import pdb from GPy.core.domains import POSITIVE, REAL # import numdifftools as ndt -class model(parameterised): +class Model(Parameterised): def __init__(self): - parameterised.__init__(self) + Parameterised.__init__(self) self.priors = None self.optimization_runs = [] self.sampling_runs = [] self.preferred_optimizer = 'scg' - #self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes + # self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes def _get_params(self): - raise NotImplementedError, "this needs to be implemented to use the model class" + raise NotImplementedError, "this needs to be implemented to use the Model class" def _set_params(self, x): - raise NotImplementedError, "this needs to be implemented to use the model class" + raise NotImplementedError, "this needs to be implemented to use the Model class" def log_likelihood(self): - raise NotImplementedError, "this needs to be implemented to use the model class" + raise NotImplementedError, "this needs to be implemented to use the Model class" def _log_likelihood_gradients(self): - raise NotImplementedError, "this needs to be implemented to use the model class" + raise NotImplementedError, "this needs to be implemented to use the Model class" def set_prior(self, regexp, what): """ - Sets priors on the model parameters. + Sets priors on the Model parameters. Arguments --------- @@ -65,7 +60,7 @@ class model(parameterised): if len(tie_matches) > 1: raise ValueError, "cannot place Prior across multiple ties" elif len(tie_matches) == 1: - which = which[:1] # just place a Prior object on the first parameter + which = which[:1] # just place a Prior object on the first parameter # check constraints are okay @@ -95,7 +90,7 @@ class model(parameterised): def get_gradient(self, name, return_names=False): """ - Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. + Get Model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. """ matches = self.grep_param_names(name) if len(matches): @@ -135,7 +130,7 @@ class model(parameterised): def randomize(self): """ - Randomize the model. + Randomize the Model. Make this draw from the Prior if one exists, else draw from N(0,1) """ # first take care of all parameters (from N(0,1)) @@ -147,16 +142,16 @@ class model(parameterised): if self.priors is not None: [np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None] self._set_params(x) - self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...) + self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...) - def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): + def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): """ - Perform random restarts of the model, and set the model to the best + Perform random restarts of the Model, and set the Model to the best seen solution. If the robust flag is set, exceptions raised during optimizations will - be handled silently. If _all_ runs fail, the model is reset to the + be handled silently. If _all_ runs fail, the Model is reset to the existing parameter values. Notes @@ -179,19 +174,19 @@ class model(parameterised): try: jobs = [] pool = mp.Pool(processes=num_processes) - for i in range(Nrestarts): + for i in range(num_restarts): self.randomize() job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs) jobs.append(job) - pool.close() # signal that no more data coming in - pool.join() # wait for all the tasks to complete + pool.close() # signal that no more data coming in + pool.join() # wait for all the tasks to complete except KeyboardInterrupt: print "Ctrl+c received, terminating and joining pool." pool.terminate() pool.join() - for i in range(Nrestarts): + for i in range(num_restarts): try: if not parallel: self.randomize() @@ -200,10 +195,10 @@ class model(parameterised): self.optimization_runs.append(jobs[i].get()) if verbose: - print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt)) + print("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt)) except Exception as e: if robust: - print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts)) + print("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts)) else: raise e @@ -218,11 +213,11 @@ class model(parameterised): Ensure that any variables which should clearly be positive have been constrained somehow. """ positive_strings = ['variance', 'lengthscale', 'precision', 'kappa'] - param_names = self._get_param_names() + # param_names = self._get_param_names() currently_constrained = self.all_constrained_indices() to_make_positive = [] for s in positive_strings: - for i in self.grep_param_names(".*"+s): + for i in self.grep_param_names(".*" + s): if not (i in currently_constrained): to_make_positive.append(i) if len(to_make_positive): @@ -240,18 +235,18 @@ class model(parameterised): Gets the gradients from the likelihood and the priors. """ self._set_params_transformed(x) - obj_grads = - self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) + obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) return obj_grads def objective_and_gradients(self, x): self._set_params_transformed(x) obj_f = -self.log_likelihood() - self.log_prior() - obj_grads = - self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) + obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) return obj_f, obj_grads def optimize(self, optimizer=None, start=None, **kwargs): """ - Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. + Optimize the Model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. kwargs are passed to the optimizer. They can be: :max_f_eval: maximum number of function evaluations @@ -274,7 +269,7 @@ class model(parameterised): def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs): # assert self.Y.shape[1] > 1, "SGD only works with D > 1" - sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) + sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) # @UndefinedVariable sgd.run() self.optimization_runs.append(sgd) @@ -291,7 +286,7 @@ class model(parameterised): def f(x): self._set_params(x) return self.log_likelihood() - h = ndt.Hessian(f) + h = ndt.Hessian(f) # @UndefinedVariable A = -h(x) self._set_params(x) # check for almost zero components on the diagonal which screw up the cholesky @@ -300,7 +295,7 @@ class model(parameterised): return A def Laplace_evidence(self): - """Returns an estiamte of the model evidence based on the Laplace approximation. + """Returns an estiamte of the Model evidence based on the Laplace approximation. Uses a numerical estimate of the hessian if none is available analytically""" A = self.Laplace_covariance() try: @@ -310,12 +305,12 @@ class model(parameterised): return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld def __str__(self): - s = parameterised.__str__(self).split('\n') + s = Parameterised.__str__(self).split('\n') # add priors to the string if self.priors is not None: strs = [str(p) if p is not None else '' for p in self.priors] else: - strs = ['']*len(self._get_params()) + strs = [''] * len(self._get_params()) width = np.array(max([len(p) for p in strs] + [5])) + 4 log_like = self.log_likelihood() @@ -336,7 +331,7 @@ class model(parameterised): def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3): """ - Check the gradient of the model by comparing to a numerical estimate. + Check the gradient of the Model by comparing to a numerical estimate. If the verbose flag is passed, invividual components are tested (and printed) :param verbose: If True, print a "full" checking of each parameter @@ -389,7 +384,7 @@ class model(parameterised): param_list = range(len(x)) else: param_list = self.grep_param_names(target_param, transformed=True, search=True) - if not param_list: + if not np.any(param_list): print "No free parameters to check" return @@ -419,15 +414,15 @@ class model(parameterised): def input_sensitivity(self): """ - return an array describing the sesitivity of the model to each input + return an array describing the sesitivity of the Model to each input NB. Right now, we're basing this on the lengthscales (or variances) of the kernel. TODO: proper sensitivity analysis - where we integrate across the model inputs and evaluate the - effect on the variance of the model output. """ + where we integrate across the Model inputs and evaluate the + effect on the variance of the Model output. """ if not hasattr(self, 'kern'): - raise ValueError, "this model has no kernel" + raise ValueError, "this Model has no kernel" k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']] if (not len(k) == 1) or (not k[0].ARD): @@ -474,8 +469,8 @@ class model(parameterised): ll_change = new_ll - last_ll if ll_change < 0: - self.likelihood = last_approximation # restore previous likelihood approximation - self._set_params(last_params) # restore model parameters + self.likelihood = last_approximation # restore previous likelihood approximation + self._set_params(last_params) # restore Model parameters print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change stop = True else: diff --git a/GPy/core/parameterised.py b/GPy/core/parameterised.py index e091e820..b3a5712a 100644 --- a/GPy/core/parameterised.py +++ b/GPy/core/parameterised.py @@ -6,12 +6,10 @@ import numpy as np import re import copy import cPickle -import os -from ..util.squashers import sigmoid import warnings import transformations -class parameterised(object): +class Parameterised(object): def __init__(self): """ This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters @@ -36,7 +34,7 @@ class parameterised(object): """ Returns a **copy** of parameters in non transformed space - :see_also: :py:func:`GPy.core.parameterised.params_transformed` + :see_also: :py:func:`GPy.core.Parameterised.params_transformed` """ return self._get_params() @@ -49,7 +47,7 @@ class parameterised(object): """ Returns a **copy** of parameters in transformed space - :see_also: :py:func:`GPy.core.parameterised.params` + :see_also: :py:func:`GPy.core.Parameterised.params` """ return self._get_params_transformed() @@ -113,7 +111,7 @@ class parameterised(object): if hasattr(self, 'prior'): pass - self._set_params_transformed(self._get_params_transformed()) # sets tied parameters to single value + self._set_params_transformed(self._get_params_transformed()) # sets tied parameters to single value def untie_everything(self): """Unties all parameters by setting tied_indices to an empty list.""" @@ -145,7 +143,7 @@ class parameterised(object): else: return np.nonzero([regexp.match(name) for name in names])[0] - def Nparam_transformed(self): + def num_params_transformed(self): removed = 0 for tie in self.tied_indices: removed += tie.size - 1 @@ -159,18 +157,18 @@ class parameterised(object): """Unconstrain matching parameters. does not untie parameters""" matches = self.grep_param_names(regexp) - #tranformed contraints: + # tranformed contraints: for match in matches: - self.constrained_indices = [i[i<>match] for i in self.constrained_indices] + self.constrained_indices = [i[i <> match] for i in self.constrained_indices] - #remove empty constraints - tmp = zip(*[(i,t) for i,t in zip(self.constrained_indices,self.constraints) if len(i)]) + # remove empty constraints + tmp = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)]) if tmp: - self.constrained_indices, self.constraints = zip(*[(i,t) for i,t in zip(self.constrained_indices,self.constraints) if len(i)]) + self.constrained_indices, self.constraints = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)]) self.constrained_indices, self.constraints = list(self.constrained_indices), list(self.constraints) # fixed: - self.fixed_values = [np.delete(values, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices,values in zip(self.fixed_indices,self.fixed_values)] + self.fixed_values = [np.delete(values, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices, values in zip(self.fixed_indices, self.fixed_values)] self.fixed_indices = [np.delete(indices, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices in self.fixed_indices] # remove empty elements @@ -189,7 +187,7 @@ class parameterised(object): """ Set positive constraints. """ self.constrain(regexp, transformations.logexp()) - def constrain_bounded(self, regexp,lower, upper): + def constrain_bounded(self, regexp, lower, upper): """ Set bounded constraints. """ self.constrain(regexp, transformations.logistic(lower, upper)) @@ -199,8 +197,8 @@ class parameterised(object): else: return np.empty(shape=(0,)) - def constrain(self,regexp,transform): - assert isinstance(transform,transformations.transformation) + def constrain(self, regexp, transform): + assert isinstance(transform, transformations.transformation) matches = self.grep_param_names(regexp) overlap = set(matches).intersection(set(self.all_constrained_indices())) @@ -251,7 +249,7 @@ class parameterised(object): def _get_params_transformed(self): """use self._get_params to get the 'true' parameters of the model, which are then tied, constrained and fixed""" x = self._get_params() - [np.put(x,i,t.finv(x[i])) for i,t in zip(self.constrained_indices,self.constraints)] + [np.put(x, i, t.finv(x[i])) for i, t in zip(self.constrained_indices, self.constraints)] to_remove = self.fixed_indices + [t[1:] for t in self.tied_indices] if len(to_remove): @@ -263,7 +261,7 @@ class parameterised(object): """ takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params""" self._set_params(self._untransform_params(x)) - def _untransform_params(self,x): + def _untransform_params(self, x): """ The transformation required for _set_params_transformed. @@ -290,9 +288,9 @@ class parameterised(object): [np.put(xx, i, v) for i, v in zip(self.fixed_indices, self.fixed_values)] [np.put(xx, i, v) for i, v in [(t[1:], xx[t[0]]) for t in self.tied_indices] ] - [np.put(xx,i,t.f(xx[i])) for i,t in zip(self.constrained_indices, self.constraints)] - if hasattr(self,'debug'): - stop + [np.put(xx, i, t.f(xx[i])) for i, t in zip(self.constrained_indices, self.constraints)] + if hasattr(self, 'debug'): + stop # @UndefinedVariable return xx @@ -316,7 +314,7 @@ class parameterised(object): remove = np.hstack((remove, np.hstack(self.fixed_indices))) # add markers to show that some variables are constrained - for i,t in zip(self.constrained_indices,self.constraints): + for i, t in zip(self.constrained_indices, self.constraints): for ii in i: n[ii] = n[ii] + t.__str__() @@ -333,10 +331,10 @@ class parameterised(object): if not N: return "This object has no free parameters." header = ['Name', 'Value', 'Constraints', 'Ties'] - values = self._get_params() # map(str,self._get_params()) + values = self._get_params() # map(str,self._get_params()) # sort out the constraints constraints = [''] * len(names) - for i,t in zip(self.constrained_indices,self.constraints): + for i, t in zip(self.constrained_indices, self.constraints): for ii in i: constraints[ii] = t.__str__() for i in self.fixed_indices: @@ -354,7 +352,7 @@ class parameterised(object): max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])]) max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])]) cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4 - columns = cols.sum() + # columns = cols.sum() header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))] header_string = map(lambda x: '|'.join(x), [header_string]) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 5ac9de9d..2cfc8ae4 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -13,13 +13,13 @@ class SparseGP(GPBase): Variational sparse GP model :param X: inputs - :type X: np.ndarray (N x input_dim) + :type X: np.ndarray (num_data x input_dim) :param likelihood: a likelihood instance, containing the observed data :type likelihood: GPy.likelihood.(Gaussian | EP | Laplace) :param kernel : the kernel (covariance function). See link kernels :type kernel: a GPy.kern.kern instance :param X_variance: The uncertainty in the measurements of X (Gaussian variance) - :type X_variance: np.ndarray (N x input_dim) | None + :type X_variance: np.ndarray (num_data x input_dim) | None :param Z: inducing inputs (optional, see note) :type Z: np.ndarray (num_inducing x input_dim) | None :param num_inducing : Number of inducing points (optional, default 10. Ignored if Z is not None) @@ -69,7 +69,7 @@ class SparseGP(GPBase): # The rather complex computations of self.A if self.has_uncertain_inputs: if self.likelihood.is_heteroscedastic: - psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.N, 1, 1))).sum(0) + psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.num_data, 1, 1))).sum(0) else: psi2_beta = self.psi2.sum(0) * self.likelihood.precision evals, evecs = linalg.eigh(psi2_beta) @@ -77,7 +77,7 @@ class SparseGP(GPBase): tmp = evecs * np.sqrt(clipped_evals) else: if self.likelihood.is_heteroscedastic: - tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.N))) + tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.num_data))) else: tmp = self.psi1 * (np.sqrt(self.likelihood.precision)) tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1) @@ -99,28 +99,28 @@ class SparseGP(GPBase): # Compute dL_dKmm tmp = tdot(self._LBi_Lmi_psi1V) - self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.input_dim * np.eye(self.num_inducing) + tmp) + self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.output_dim * np.eye(self.num_inducing) + tmp) tmp = -0.5 * self.DBi_plus_BiPBi - tmp += -0.5 * self.B * self.input_dim - tmp += self.input_dim * np.eye(self.num_inducing) + tmp += -0.5 * self.B * self.output_dim + tmp += self.output_dim * np.eye(self.num_inducing) self.dL_dKmm = backsub_both_sides(self.Lm, tmp) # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case - self.dL_dpsi0 = -0.5 * self.input_dim * (self.likelihood.precision * np.ones([self.N, 1])).flatten() + self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten() self.dL_dpsi1 = np.dot(self.Cpsi1V, self.likelihood.V.T) - dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.input_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi) + dL_dpsi2_beta = 0.5 * backsub_both_sides(self.Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi) if self.likelihood.is_heteroscedastic: if self.has_uncertain_inputs: self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :] else: - self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.N)) + self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, self.psi1 * self.likelihood.precision.reshape(1, self.num_data)) self.dL_dpsi2 = None else: dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta if self.has_uncertain_inputs: # repeat for each of the N psi_2 matrices - self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.N, axis=0) + self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.num_data, axis=0) else: # subsume back into psi1 (==Kmn) self.dL_dpsi1 += 2.*np.dot(dL_dpsi2, self.psi1) @@ -135,26 +135,26 @@ class SparseGP(GPBase): raise NotImplementedError, "heteroscedatic derivates not implemented" else: # likelihood is not heterscedatic - self.partial_for_likelihood = -0.5 * self.N * self.input_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 - self.partial_for_likelihood += 0.5 * self.input_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision) + self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2 + self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self.A) * self.likelihood.precision) self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self.A * self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V))) def log_likelihood(self): """ Compute the (lower bound on the) log marginal likelihood """ if self.likelihood.is_heteroscedastic: - A = -0.5 * self.N * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y) + A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y) B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self.A)) else: - A = -0.5 * self.N * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT + A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) return A + B + C + D + self.likelihood.Z def _set_params(self, p): - self.Z = p[:self.num_inducing * self.output_dim].reshape(self.num_inducing, self.input_dim) - self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.Nparam]) - self.likelihood._set_params(p[self.Z.size + self.kern.Nparam:]) + self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim) + self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.num_params]) + self.likelihood._set_params(p[self.Z.size + self.kern.num_params:]) self._compute_kernel_matrices() self._computations() diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 621a3749..ec6d2ca6 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -12,7 +12,7 @@ default_seed = np.random.seed(123344) def BGPLVM(seed=default_seed): N = 10 - M = 3 + num_inducing = 3 Q = 2 D = 4 # generate GPLVM-like data @@ -26,7 +26,7 @@ def BGPLVM(seed=default_seed): # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001) # k = GPy.kern.rbf(Q, ARD = False) + GPy.kern.white(Q, 0.00001) - m = GPy.models.BayesianGPLVM(Y, Q, kernel=k, M=M) + m = GPy.models.BayesianGPLVM(Y, Q, kernel=k, num_inducing=num_inducing) m.constrain_positive('(rbf|bias|noise|white|S)') # m.constrain_fixed('S', 1) @@ -62,7 +62,7 @@ def GPLVM_oil_100(optimize=True): m.plot_latent(labels=m.data_labels) return m -def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False): +def swiss_roll(optimize=True, N=1000, num_inducing=15, Q=4, sigma=.2, plot=False): from GPy.util.datasets import swiss_roll_generated from GPy.core.transformations import logexp_clipped @@ -100,11 +100,11 @@ def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False): S = (var * np.ones_like(X) + np.clip(np.random.randn(N, Q) * var ** 2, - (1 - var), (1 - var))) + .001 - Z = np.random.permutation(X)[:M] + Z = np.random.permutation(X)[:num_inducing] kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2)) - m = BayesianGPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel) + m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) m.data_colors = c m.data_t = t @@ -117,7 +117,7 @@ def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False): m.optimize('scg', messages=1) return m -def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k): +def BGPLVM_oil(optimize=True, N=100, Q=5, num_inducing=25, max_f_eval=4e3, plot=False, **k): np.random.seed(0) data = GPy.util.datasets.oil() from GPy.core.transformations import logexp_clipped @@ -128,7 +128,7 @@ def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k) Yn = Y - Y.mean(0) Yn /= Yn.std(0) - m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, M=M, **k) + m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k) m.data_labels = data['Y'][:N].argmax(axis=1) # m.constrain('variance|leng', logexp_clipped()) @@ -167,7 +167,7 @@ def oil_100(): -def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False): +def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): x = np.linspace(0, 4 * np.pi, N)[:, None] s1 = np.vectorize(lambda x: np.sin(x)) s2 = np.vectorize(lambda x: np.cos(x)) @@ -227,13 +227,13 @@ def bgplvm_simulation_matlab_compare(): Y = sim_data['Y'] S = sim_data['S'] mu = sim_data['mu'] - M, [_, Q] = 3, mu.shape + num_inducing, [_, Q] = 3, mu.shape from GPy.models import mrd from GPy import kern reload(mrd); reload(kern) k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) - m = BayesianGPLVM(Y, Q, init="PCA", M=M, kernel=k, + m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, # X=mu, # X_variance=S, _debug=False) @@ -247,8 +247,8 @@ def bgplvm_simulation(optimize='scg', plot=True, max_f_eval=2e4): # from GPy.core.transformations import logexp_clipped - D1, D2, D3, N, M, Q = 15, 8, 8, 100, 3, 5 - slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot) + D1, D2, D3, N, num_inducing, Q = 15, 8, 8, 100, 3, 5 + slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot) from GPy.models import mrd from GPy import kern @@ -258,7 +258,7 @@ def bgplvm_simulation(optimize='scg', Y = Ylist[0] k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q) - m = BayesianGPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True) + m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, _debug=True) # m.constrain('variance|noise', logexp_clipped()) m.ensure_default_constraints() m['noise'] = Y.var() / 100. @@ -275,8 +275,8 @@ def bgplvm_simulation(optimize='scg', return m def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): - D1, D2, D3, N, M, Q = 150, 200, 400, 500, 3, 7 - slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim) + D1, D2, D3, N, num_inducing, Q = 150, 200, 400, 500, 3, 7 + slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) from GPy.models import mrd from GPy import kern @@ -284,7 +284,7 @@ def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw): reload(mrd); reload(kern) k = kern.linear(Q, [.05] * Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) - m = mrd.MRD(Ylist, input_dim=Q, M=M, kernels=k, initx="", initz='permute', **kw) + m = mrd.MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw) for i, Y in enumerate(Ylist): m['{}_noise'.format(i + 1)] = Y.var() / 100. @@ -312,7 +312,7 @@ def brendan_faces(): Yn /= Yn.std() m = GPy.models.GPLVM(Yn, Q) - # m = GPy.models.BayesianGPLVM(Yn, Q, M=100) + # m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=100) # optimize m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) @@ -376,16 +376,16 @@ def cmu_mocap(subject='35', motion=['01'], in_place=True): # X /= X.std(axis=0) # # Q = 10 -# M = 30 +# num_inducing = 30 # # kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) + GPy.kern.white(Q) -# m = GPy.models.BayesianGPLVM(X, Q, kernel=kernel, M=M) +# m = GPy.models.BayesianGPLVM(X, Q, kernel=kernel, num_inducing=num_inducing) # # m.scale_factor = 100.0 # m.constrain_positive('(white|noise|bias|X_variance|rbf_variance|rbf_length)') # from sklearn import cluster -# km = cluster.KMeans(M, verbose=10) +# km = cluster.KMeans(num_inducing, verbose=10) # Z = km.fit(m.X).cluster_centers_ -# # Z = GPy.util.misc.kmm_init(m.X, M) +# # Z = GPy.util.misc.kmm_init(m.X, num_inducing) # m.set('iip', Z) # m.set('bias', 1e-4) # # optimize diff --git a/GPy/examples/regression.py b/GPy/examples/regression.py index 1512e842..a683f6bb 100644 --- a/GPy/examples/regression.py +++ b/GPy/examples/regression.py @@ -10,16 +10,16 @@ import numpy as np import GPy -def toy_rbf_1d(max_nb_eval_optim=100): +def toy_rbf_1d(optimizer='tnc', max_nb_eval_optim=100): """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" data = GPy.util.datasets.toy_rbf_1d() - # create simple GP model + # create simple GP Model m = GPy.models.GPRegression(data['X'],data['Y']) # optimize m.ensure_default_constraints() - m.optimize(max_f_eval=max_nb_eval_optim) + m.optimize(optimizer, max_f_eval=max_nb_eval_optim) # plot m.plot() print(m) @@ -29,7 +29,7 @@ def rogers_girolami_olympics(optim_iters=100): """Run a standard Gaussian process regression on the Rogers and Girolami olympics data.""" data = GPy.util.datasets.rogers_girolami_olympics() - # create simple GP model + # create simple GP Model m = GPy.models.GPRegression(data['X'],data['Y']) #set the lengthscale to be something sensible (defaults to 1) @@ -48,7 +48,7 @@ def toy_rbf_1d_50(optim_iters=100): """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" data = GPy.util.datasets.toy_rbf_1d_50() - # create simple GP model + # create simple GP Model m = GPy.models.GPRegression(data['X'],data['Y']) # optimize @@ -64,7 +64,7 @@ def silhouette(optim_iters=100): """Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper.""" data = GPy.util.datasets.silhouette() - # create simple GP model + # create simple GP Model m = GPy.models.GPRegression(data['X'],data['Y']) # optimize @@ -151,8 +151,8 @@ def coregionalisation_sparse(optim_iters=100): Y2 = -np.sin(X2) + np.random.randn(*X2.shape)*0.05 Y = np.vstack((Y1,Y2)) - M = 40 - Z = np.hstack((np.random.rand(M,1)*8,np.random.randint(0,2,M)[:,None])) + num_inducing = 40 + Z = np.hstack((np.random.rand(num_inducing,1)*8,np.random.randint(0,2,num_inducing)[:,None])) k1 = GPy.kern.rbf(1) k2 = GPy.kern.Coregionalise(2,2) @@ -244,24 +244,24 @@ def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf): lls = [] total_var = np.var(data['Y']) kernel = kernel_call(1, variance=1., lengthscale=1.) - model = GPy.models.GPRegression(data['X'], data['Y'], kernel=kernel) + Model = GPy.models.GPRegression(data['X'], data['Y'], kernel=kernel) for log_SNR in log_SNRs: SNR = 10.**log_SNR noise_var = total_var/(1.+SNR) signal_var = total_var - noise_var - model.kern['.*variance'] = signal_var - model['noise_variance'] = noise_var + Model.kern['.*variance'] = signal_var + Model['noise_variance'] = noise_var length_scale_lls = [] for length_scale in length_scales: - model['.*lengthscale'] = length_scale - length_scale_lls.append(model.log_likelihood()) + Model['.*lengthscale'] = length_scale + length_scale_lls.append(Model.log_likelihood()) lls.append(length_scale_lls) return np.array(lls) -def sparse_GP_regression_1D(N = 400, M = 5, optim_iters=100): +def sparse_GP_regression_1D(N = 400, num_inducing = 5, optim_iters=100): """Run a 1D example of a sparse GP regression.""" # sample inputs and outputs X = np.random.uniform(-3.,3.,(N,1)) @@ -270,8 +270,8 @@ def sparse_GP_regression_1D(N = 400, M = 5, optim_iters=100): rbf = GPy.kern.rbf(1) noise = GPy.kern.white(1) kernel = rbf + noise - # create simple GP model - m = GPy.models.SparseGPRegression(X, Y, kernel, M=M) + # create simple GP Model + m = GPy.models.SparseGPRegression(X, Y, kernel, num_inducing=num_inducing) m.ensure_default_constraints() @@ -280,7 +280,7 @@ def sparse_GP_regression_1D(N = 400, M = 5, optim_iters=100): m.plot() return m -def sparse_GP_regression_2D(N = 400, M = 50, optim_iters=100): +def sparse_GP_regression_2D(N = 400, num_inducing = 50, optim_iters=100): """Run a 2D example of a sparse GP regression.""" X = np.random.uniform(-3.,3.,(N,2)) Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(N,1)*0.05 @@ -290,8 +290,8 @@ def sparse_GP_regression_2D(N = 400, M = 50, optim_iters=100): noise = GPy.kern.white(2) kernel = rbf + noise - # create simple GP model - m = GPy.models.SparseGPRegression(X,Y,kernel, M = M) + # create simple GP Model + m = GPy.models.SparseGPRegression(X,Y,kernel, num_inducing = num_inducing) # contrain all parameters to be positive (but not inducing inputs) m.ensure_default_constraints() @@ -318,7 +318,7 @@ def uncertain_inputs_sparse_regression(optim_iters=100): k = GPy.kern.rbf(1) + GPy.kern.white(1) - # create simple GP model - no input uncertainty on this one + # create simple GP Model - no input uncertainty on this one m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z) m.ensure_default_constraints() m.optimize('scg', messages=1, max_f_eval=optim_iters) @@ -326,7 +326,7 @@ def uncertain_inputs_sparse_regression(optim_iters=100): axes[0].set_title('no input uncertainty') - #the same model with uncertainty + #the same Model with uncertainty m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S) m.ensure_default_constraints() m.optimize('scg', messages=1, max_f_eval=optim_iters) diff --git a/GPy/examples/tutorials.py b/GPy/examples/tutorials.py index bd403ae8..2eb2acfb 100644 --- a/GPy/examples/tutorials.py +++ b/GPy/examples/tutorials.py @@ -33,7 +33,7 @@ def tuto_GP_regression(): m.optimize() - m.optimize_restarts(Nrestarts = 10) + m.optimize_restarts(num_restarts = 10) ########################### # 2-dimensional example # diff --git a/GPy/inference/sgd.py b/GPy/inference/sgd.py index 2df09ec8..0002bb22 100644 --- a/GPy/inference/sgd.py +++ b/GPy/inference/sgd.py @@ -11,17 +11,17 @@ class opt_SGD(Optimizer): Optimize using stochastic gradient descent. *** Parameters *** - model: reference to the model object + Model: reference to the Model object iterations: number of iterations learning_rate: learning rate momentum: momentum """ - def __init__(self, start, iterations = 10, learning_rate = 1e-4, momentum = 0.9, model = None, messages = False, batch_size = 1, self_paced = False, center = True, iteration_file = None, learning_rate_adaptation=None, actual_iter=None, schedule=None, **kwargs): + def __init__(self, start, iterations = 10, learning_rate = 1e-4, momentum = 0.9, Model = None, messages = False, batch_size = 1, self_paced = False, center = True, iteration_file = None, learning_rate_adaptation=None, actual_iter=None, schedule=None, **kwargs): self.opt_name = "Stochastic Gradient Descent" - self.model = model + self.Model = Model self.iterations = iterations self.momentum = momentum self.learning_rate = learning_rate @@ -42,17 +42,17 @@ class opt_SGD(Optimizer): self.learning_rate_0 = self.learning_rate.mean() self.schedule = schedule - # if len([p for p in self.model.kern.parts if p.name == 'bias']) == 1: + # if len([p for p in self.Model.kern.parts if p.name == 'bias']) == 1: # self.param_traces.append(('bias',[])) - # if len([p for p in self.model.kern.parts if p.name == 'linear']) == 1: + # if len([p for p in self.Model.kern.parts if p.name == 'linear']) == 1: # self.param_traces.append(('linear',[])) - # if len([p for p in self.model.kern.parts if p.name == 'rbf']) == 1: + # if len([p for p in self.Model.kern.parts if p.name == 'rbf']) == 1: # self.param_traces.append(('rbf_var',[])) self.param_traces = dict(self.param_traces) self.fopt_trace = [] - num_params = len(self.model._get_params()) + num_params = len(self.Model._get_params()) if isinstance(self.learning_rate, float): self.learning_rate = np.ones((num_params,)) * self.learning_rate @@ -84,7 +84,7 @@ class opt_SGD(Optimizer): return (np.isnan(data).sum(axis=1) == 0) def check_for_missing(self, data): - if sp.sparse.issparse(self.model.likelihood.Y): + if sp.sparse.issparse(self.Model.likelihood.Y): return True else: return np.isnan(data).sum() > 0 @@ -107,32 +107,32 @@ class opt_SGD(Optimizer): def shift_constraints(self, j): - constrained_indices = copy.deepcopy(self.model.constrained_indices) + constrained_indices = copy.deepcopy(self.Model.constrained_indices) for c, constraint in enumerate(constrained_indices): mask = (np.ones_like(constrained_indices[c]) == 1) for i in range(len(constrained_indices[c])): pos = np.where(j == constrained_indices[c][i])[0] if len(pos) == 1: - self.model.constrained_indices[c][i] = pos + self.Model.constrained_indices[c][i] = pos else: mask[i] = False - self.model.constrained_indices[c] = self.model.constrained_indices[c][mask] + self.Model.constrained_indices[c] = self.Model.constrained_indices[c][mask] return constrained_indices # back them up - # bounded_i = copy.deepcopy(self.model.constrained_bounded_indices) - # bounded_l = copy.deepcopy(self.model.constrained_bounded_lowers) - # bounded_u = copy.deepcopy(self.model.constrained_bounded_uppers) + # bounded_i = copy.deepcopy(self.Model.constrained_bounded_indices) + # bounded_l = copy.deepcopy(self.Model.constrained_bounded_lowers) + # bounded_u = copy.deepcopy(self.Model.constrained_bounded_uppers) # for b in range(len(bounded_i)): # for each group of constraints # for bc in range(len(bounded_i[b])): # pos = np.where(j == bounded_i[b][bc])[0] # if len(pos) == 1: - # pos2 = np.where(self.model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0] - # self.model.constrained_bounded_indices[b][pos2] = pos[0] + # pos2 = np.where(self.Model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0] + # self.Model.constrained_bounded_indices[b][pos2] = pos[0] # else: - # if len(self.model.constrained_bounded_indices[b]) == 1: + # if len(self.Model.constrained_bounded_indices[b]) == 1: # # if it's the last index to be removed # # the logic here is just a mess. If we remove the last one, then all the # # b-indices change and we have to iterate through everything to find our @@ -140,35 +140,35 @@ class opt_SGD(Optimizer): # raise NotImplementedError # else: # just remove it from the indices - # mask = self.model.constrained_bounded_indices[b] != bc - # self.model.constrained_bounded_indices[b] = self.model.constrained_bounded_indices[b][mask] + # mask = self.Model.constrained_bounded_indices[b] != bc + # self.Model.constrained_bounded_indices[b] = self.Model.constrained_bounded_indices[b][mask] # # here we shif the positive constraints. We cycle through each positive # # constraint - # positive = self.model.constrained_positive_indices.copy() + # positive = self.Model.constrained_positive_indices.copy() # mask = (np.ones_like(positive) == 1) # for p in range(len(positive)): # # we now check whether the constrained index appears in the j vector # # (the vector of the "active" indices) - # pos = np.where(j == self.model.constrained_positive_indices[p])[0] + # pos = np.where(j == self.Model.constrained_positive_indices[p])[0] # if len(pos) == 1: - # self.model.constrained_positive_indices[p] = pos + # self.Model.constrained_positive_indices[p] = pos # else: # mask[p] = False - # self.model.constrained_positive_indices = self.model.constrained_positive_indices[mask] + # self.Model.constrained_positive_indices = self.Model.constrained_positive_indices[mask] # return (bounded_i, bounded_l, bounded_u), positive def restore_constraints(self, c):#b, p): - # self.model.constrained_bounded_indices = b[0] - # self.model.constrained_bounded_lowers = b[1] - # self.model.constrained_bounded_uppers = b[2] - # self.model.constrained_positive_indices = p - self.model.constrained_indices = c + # self.Model.constrained_bounded_indices = b[0] + # self.Model.constrained_bounded_lowers = b[1] + # self.Model.constrained_bounded_uppers = b[2] + # self.Model.constrained_positive_indices = p + self.Model.constrained_indices = c def get_param_shapes(self, N = None, input_dim = None): - model_name = self.model.__class__.__name__ + model_name = self.Model.__class__.__name__ if model_name == 'GPLVM': return [(N, input_dim)] if model_name == 'Bayesian_GPLVM': @@ -179,37 +179,37 @@ class opt_SGD(Optimizer): def step_with_missing_data(self, f_fp, X, step, shapes): N, input_dim = X.shape - if not sp.sparse.issparse(self.model.likelihood.Y): - Y = self.model.likelihood.Y - samples = self.non_null_samples(self.model.likelihood.Y) - self.model.N = samples.sum() + if not sp.sparse.issparse(self.Model.likelihood.Y): + Y = self.Model.likelihood.Y + samples = self.non_null_samples(self.Model.likelihood.Y) + self.Model.N = samples.sum() Y = Y[samples] else: - samples = self.model.likelihood.Y.nonzero()[0] - self.model.N = len(samples) - Y = np.asarray(self.model.likelihood.Y[samples].todense(), dtype = np.float64) + samples = self.Model.likelihood.Y.nonzero()[0] + self.Model.N = len(samples) + Y = np.asarray(self.Model.likelihood.Y[samples].todense(), dtype = np.float64) - if self.model.N == 0 or Y.std() == 0.0: - return 0, step, self.model.N + if self.Model.N == 0 or Y.std() == 0.0: + return 0, step, self.Model.N - self.model.likelihood._offset = Y.mean() - self.model.likelihood._scale = Y.std() - self.model.likelihood.set_data(Y) - # self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision + self.Model.likelihood._offset = Y.mean() + self.Model.likelihood._scale = Y.std() + self.Model.likelihood.set_data(Y) + # self.Model.likelihood.V = self.Model.likelihood.Y*self.Model.likelihood.precision - sigma = self.model.likelihood._variance - self.model.likelihood._variance = None # invalidate cache - self.model.likelihood._set_params(sigma) + sigma = self.Model.likelihood._variance + self.Model.likelihood._variance = None # invalidate cache + self.Model.likelihood._set_params(sigma) j = self.subset_parameter_vector(self.x_opt, samples, shapes) - self.model.X = X[samples] + self.Model.X = X[samples] - model_name = self.model.__class__.__name__ + model_name = self.Model.__class__.__name__ if model_name == 'Bayesian_GPLVM': - self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T) - self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT) + self.Model.likelihood.YYT = np.dot(self.Model.likelihood.Y, self.Model.likelihood.Y.T) + self.Model.likelihood.trYYT = np.trace(self.Model.likelihood.YYT) ci = self.shift_constraints(j) f, fp = f_fp(self.x_opt[j]) @@ -218,18 +218,18 @@ class opt_SGD(Optimizer): self.x_opt[j] -= step[j] self.restore_constraints(ci) - self.model.grads[j] = fp + self.Model.grads[j] = fp # restore likelihood _offset and _scale, otherwise when we call set_data(y) on # the next feature, it will get normalized with the mean and std of this one. - self.model.likelihood._offset = 0 - self.model.likelihood._scale = 1 + self.Model.likelihood._offset = 0 + self.Model.likelihood._scale = 1 - return f, step, self.model.N + return f, step, self.Model.N def adapt_learning_rate(self, t, D): if self.learning_rate_adaptation == 'adagrad': if t > 0: - g_k = self.model.grads + g_k = self.Model.grads self.s_k += np.square(g_k) t0 = 100.0 self.learning_rate = 0.1/(t0 + np.sqrt(self.s_k)) @@ -245,8 +245,8 @@ class opt_SGD(Optimizer): elif self.learning_rate_adaptation == 'semi_pesky': - if self.model.__class__.__name__ == 'Bayesian_GPLVM': - g_t = self.model.grads + if self.Model.__class__.__name__ == 'Bayesian_GPLVM': + g_t = self.Model.grads if t == 0: self.hbar_t = 0.0 self.tau_t = 100.0 @@ -259,28 +259,28 @@ class opt_SGD(Optimizer): def opt(self, f_fp=None, f=None, fp=None): - self.x_opt = self.model._get_params_transformed() + self.x_opt = self.Model._get_params_transformed() self.grads = [] - X, Y = self.model.X.copy(), self.model.likelihood.Y.copy() + X, Y = self.Model.X.copy(), self.Model.likelihood.Y.copy() - self.model.likelihood.YYT = 0 - self.model.likelihood.trYYT = 0 - self.model.likelihood._offset = 0.0 - self.model.likelihood._scale = 1.0 + self.Model.likelihood.YYT = 0 + self.Model.likelihood.trYYT = 0 + self.Model.likelihood._offset = 0.0 + self.Model.likelihood._scale = 1.0 - N, input_dim = self.model.X.shape - D = self.model.likelihood.Y.shape[1] - num_params = self.model._get_params() + N, input_dim = self.Model.X.shape + D = self.Model.likelihood.Y.shape[1] + num_params = self.Model._get_params() self.trace = [] - missing_data = self.check_for_missing(self.model.likelihood.Y) + missing_data = self.check_for_missing(self.Model.likelihood.Y) step = np.zeros_like(num_params) for it in range(self.iterations): if self.actual_iter != None: it = self.actual_iter - self.model.grads = np.zeros_like(self.x_opt) # TODO this is ugly + self.Model.grads = np.zeros_like(self.x_opt) # TODO this is ugly if it == 0 or self.self_paced is False: features = np.random.permutation(Y.shape[1]) @@ -292,29 +292,29 @@ class opt_SGD(Optimizer): NLL = [] import pylab as plt for count, j in enumerate(features): - self.model.input_dim = len(j) - self.model.likelihood.input_dim = len(j) - self.model.likelihood.set_data(Y[:, j]) - # self.model.likelihood.V = self.model.likelihood.Y*self.model.likelihood.precision + self.Model.input_dim = len(j) + self.Model.likelihood.input_dim = len(j) + self.Model.likelihood.set_data(Y[:, j]) + # self.Model.likelihood.V = self.Model.likelihood.Y*self.Model.likelihood.precision - sigma = self.model.likelihood._variance - self.model.likelihood._variance = None # invalidate cache - self.model.likelihood._set_params(sigma) + sigma = self.Model.likelihood._variance + self.Model.likelihood._variance = None # invalidate cache + self.Model.likelihood._set_params(sigma) if missing_data: shapes = self.get_param_shapes(N, input_dim) f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes) else: - self.model.likelihood.YYT = np.dot(self.model.likelihood.Y, self.model.likelihood.Y.T) - self.model.likelihood.trYYT = np.trace(self.model.likelihood.YYT) + self.Model.likelihood.YYT = np.dot(self.Model.likelihood.Y, self.Model.likelihood.Y.T) + self.Model.likelihood.trYYT = np.trace(self.Model.likelihood.YYT) Nj = N f, fp = f_fp(self.x_opt) - self.model.grads = fp.copy() + self.Model.grads = fp.copy() step = self.momentum * step + self.learning_rate * fp self.x_opt -= step if self.messages == 2: - noise = self.model.likelihood._variance + noise = self.Model.likelihood._variance status = "evaluating {feature: 5d}/{tot: 5d} \t f: {f: 2.3f} \t non-missing: {nm: 4d}\t noise: {noise: 2.4f}\r".format(feature = count, tot = len(features), f = f, nm = Nj, noise = noise) sys.stdout.write(status) sys.stdout.flush() @@ -328,19 +328,19 @@ class opt_SGD(Optimizer): # plt.plot(self.param_traces['noise']) # for k in self.param_traces.keys(): - # self.param_traces[k].append(self.model.get(k)[0]) - self.grads.append(self.model.grads.tolist()) + # self.param_traces[k].append(self.Model.get(k)[0]) + self.grads.append(self.Model.grads.tolist()) # should really be a sum(), but earlier samples in the iteration will have a very crappy ll self.f_opt = np.mean(NLL) - self.model.N = N - self.model.X = X - self.model.input_dim = D - self.model.likelihood.N = N - self.model.likelihood.input_dim = D - self.model.likelihood.Y = Y - sigma = self.model.likelihood._variance - self.model.likelihood._variance = None # invalidate cache - self.model.likelihood._set_params(sigma) + self.Model.N = N + self.Model.X = X + self.Model.input_dim = D + self.Model.likelihood.N = N + self.Model.likelihood.input_dim = D + self.Model.likelihood.Y = Y + sigma = self.Model.likelihood._variance + self.Model.likelihood._variance = None # invalidate cache + self.Model.likelihood._set_params(sigma) self.trace.append(self.f_opt) if self.iteration_file is not None: diff --git a/GPy/kern/Brownian.py b/GPy/kern/Brownian.py index fe60ec59..76e103af 100644 --- a/GPy/kern/Brownian.py +++ b/GPy/kern/Brownian.py @@ -2,14 +2,14 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np def theta(x): """Heavisdie step function""" return np.where(x>=0.,1.,0.) -class Brownian(kernpart): +class Brownian(Kernpart): """ Brownian Motion kernel. @@ -21,7 +21,7 @@ class Brownian(kernpart): def __init__(self,input_dim,variance=1.): self.input_dim = input_dim assert self.input_dim==1, "Brownian motion in 1D only" - self.Nparam = 1. + self.num_params = 1. self.name = 'Brownian' self._set_params(np.array([variance]).flatten()) diff --git a/GPy/kern/Matern32.py b/GPy/kern/Matern32.py index 6204ca5e..60f0b6e9 100644 --- a/GPy/kern/Matern32.py +++ b/GPy/kern/Matern32.py @@ -2,13 +2,11 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np -import hashlib -from ..util.linalg import pdinv,mdot from scipy import integrate -class Matern32(kernpart): +class Matern32(Kernpart): """ Matern 3/2 kernel: @@ -28,11 +26,11 @@ class Matern32(kernpart): """ - def __init__(self,input_dim,variance=1.,lengthscale=None,ARD=False): + def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False): self.input_dim = input_dim self.ARD = ARD if ARD == False: - self.Nparam = 2 + self.num_params = 2 self.name = 'Mat32' if lengthscale is not None: lengthscale = np.asarray(lengthscale) @@ -40,76 +38,76 @@ class Matern32(kernpart): else: lengthscale = np.ones(1) else: - self.Nparam = self.input_dim + 1 + self.num_params = self.input_dim + 1 self.name = 'Mat32' if lengthscale is not None: lengthscale = np.asarray(lengthscale) assert lengthscale.size == self.input_dim, "bad number of lengthscales" else: lengthscale = np.ones(self.input_dim) - self._set_params(np.hstack((variance,lengthscale.flatten()))) + self._set_params(np.hstack((variance, lengthscale.flatten()))) def _get_params(self): """return the value of the parameters.""" - return np.hstack((self.variance,self.lengthscale)) + return np.hstack((self.variance, self.lengthscale)) - def _set_params(self,x): + def _set_params(self, x): """set the value of the parameters.""" - assert x.size == self.Nparam + assert x.size == self.num_params self.variance = x[0] self.lengthscale = x[1:] def _get_param_names(self): """return parameter names.""" - if self.Nparam == 2: - return ['variance','lengthscale'] + if self.num_params == 2: + return ['variance', 'lengthscale'] else: - return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)] + return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)] - def K(self,X,X2,target): + def K(self, X, X2, target): """Compute the covariance matrix between X and X2.""" if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) - np.add(self.variance*(1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist), target,target) + dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) + np.add(self.variance * (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist), target, target) - def Kdiag(self,X,target): + def Kdiag(self, X, target): """Compute the diagonal of the covariance matrix associated to X.""" - np.add(target,self.variance,target) + np.add(target, self.variance, target) - def dK_dtheta(self,dL_dK,X,X2,target): + def dK_dtheta(self, dL_dK, X, X2, target): """derivative of the covariance matrix with respect to the parameters.""" if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) - dvar = (1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist) - invdist = 1./np.where(dist!=0.,dist,np.inf) - dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3 - #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - target[0] += np.sum(dvar*dL_dK) + dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1)) + dvar = (1 + np.sqrt(3.) * dist) * np.exp(-np.sqrt(3.) * dist) + invdist = 1. / np.where(dist != 0., dist, np.inf) + dist2M = np.square(X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 3 + # dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] + target[0] += np.sum(dvar * dL_dK) if self.ARD == True: - dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] - #dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None] - target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0) + dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist))[:, :, np.newaxis] * dist2M * invdist[:, :, np.newaxis] + # dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None] + target[1:] += (dl * dL_dK[:, :, None]).sum(0).sum(0) else: - dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist - #dl = self.variance*dvar*dist2M.sum(-1)*invdist - target[1] += np.sum(dl*dL_dK) + dl = (self.variance * 3 * dist * np.exp(-np.sqrt(3.) * dist)) * dist2M.sum(-1) * invdist + # dl = self.variance*dvar*dist2M.sum(-1)*invdist + target[1] += np.sum(dl * dL_dK) - def dKdiag_dtheta(self,dL_dKdiag,X,target): + def dKdiag_dtheta(self, dL_dKdiag, X, target): """derivative of the diagonal of the covariance matrix with respect to the parameters.""" target[0] += np.sum(dL_dKdiag) - def dK_dX(self,dL_dK,X,X2,target): + def dK_dX(self, dL_dK, X, X2, target): """derivative of the covariance matrix with respect to X.""" if X2 is None: X2 = X - dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] - ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) - dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2)) - target += np.sum(dK_dX*dL_dK.T[:,:,None],0) + dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None] + ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf) + dK_dX = -np.transpose(3 * self.variance * dist * np.exp(-np.sqrt(3) * dist) * ddist_dX, (1, 0, 2)) + target += np.sum(dK_dX * dL_dK.T[:, :, None], 0) - def dKdiag_dX(self,dL_dKdiag,X,target): + def dKdiag_dX(self, dL_dKdiag, X, target): pass - def Gram_matrix(self,F,F1,F2,lower,upper): + def Gram_matrix(self, F, F1, F2, lower, upper): """ Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1. @@ -123,15 +121,15 @@ class Matern32(kernpart): :type lower,upper: floats """ assert self.input_dim == 1 - def L(x,i): - return(3./self.lengthscale**2*F[i](x) + 2*np.sqrt(3)/self.lengthscale*F1[i](x) + F2[i](x)) + def L(x, i): + return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x)) n = F.shape[0] - G = np.zeros((n,n)) + G = np.zeros((n, n)) for i in range(n): - for j in range(i,n): - G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0] - Flower = np.array([f(lower) for f in F])[:,None] - F1lower = np.array([f(lower) for f in F1])[:,None] - #print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n" - #return(G) - return(self.lengthscale**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T)) + for j in range(i, n): + G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0] + Flower = np.array([f(lower) for f in F])[:, None] + F1lower = np.array([f(lower) for f in F1])[:, None] + # print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n" + # return(G) + return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T)) diff --git a/GPy/kern/Matern52.py b/GPy/kern/Matern52.py index c35715ee..e02cb9bf 100644 --- a/GPy/kern/Matern52.py +++ b/GPy/kern/Matern52.py @@ -2,12 +2,12 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np import hashlib from scipy import integrate -class Matern52(kernpart): +class Matern52(Kernpart): """ Matern 5/2 kernel: @@ -30,7 +30,7 @@ class Matern52(kernpart): self.input_dim = input_dim self.ARD = ARD if ARD == False: - self.Nparam = 2 + self.num_params = 2 self.name = 'Mat52' if lengthscale is not None: lengthscale = np.asarray(lengthscale) @@ -38,7 +38,7 @@ class Matern52(kernpart): else: lengthscale = np.ones(1) else: - self.Nparam = self.input_dim + 1 + self.num_params = self.input_dim + 1 self.name = 'Mat52' if lengthscale is not None: lengthscale = np.asarray(lengthscale) @@ -53,13 +53,13 @@ class Matern52(kernpart): def _set_params(self,x): """set the value of the parameters.""" - assert x.size == self.Nparam + assert x.size == self.num_params self.variance = x[0] self.lengthscale = x[1:] def _get_param_names(self): """return parameter names.""" - if self.Nparam == 2: + if self.num_params == 2: return ['variance','lengthscale'] else: return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)] diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index a71d38f4..97c1d88f 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -2,7 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, Coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs +from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, symmetric, Coregionalise, rational_quadratic, Fixed, rbfcos, IndependentOutputs try: from constructors import rbf_sympy, sympykern # these depend on sympy except: diff --git a/GPy/kern/bias.py b/GPy/kern/bias.py index 9defbf95..8ec3741d 100644 --- a/GPy/kern/bias.py +++ b/GPy/kern/bias.py @@ -2,11 +2,11 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np import hashlib -class bias(kernpart): +class bias(Kernpart): def __init__(self,input_dim,variance=1.): """ :param input_dim: the number of input dimensions @@ -15,7 +15,7 @@ class bias(kernpart): :type variance: float """ self.input_dim = input_dim - self.Nparam = 1 + self.num_params = 1 self.name = 'bias' self._set_params(np.array([variance]).flatten()) diff --git a/GPy/kern/constructors.py b/GPy/kern/constructors.py index a6ed1145..520c931b 100644 --- a/GPy/kern/constructors.py +++ b/GPy/kern/constructors.py @@ -12,7 +12,7 @@ from exponential import exponential as exponentialpart from Matern32 import Matern32 as Matern32part from Matern52 import Matern52 as Matern52part from bias import bias as biaspart -from fixed import fixed as fixedpart +from fixed import Fixed as fixedpart from finite_dimensional import finite_dimensional as finite_dimensionalpart from spline import spline as splinepart from Brownian import Brownian as Brownianpart @@ -24,7 +24,7 @@ from symmetric import symmetric as symmetric_part from coregionalise import Coregionalise as coregionalise_part from rational_quadratic import rational_quadratic as rational_quadraticpart from rbfcos import rbfcos as rbfcospart -from independent_outputs import independent_outputs as independent_output_part +from independent_outputs import IndependentOutputs as independent_output_part #TODO these s=constructors are not as clean as we'd like. Tidy the code up #using meta-classes to make the objects construct properly wthout them. @@ -294,9 +294,9 @@ def rational_quadratic(D,variance=1., lengthscale=1., power=1.): part = rational_quadraticpart(D,variance, lengthscale, power) return kern(D, [part]) -def fixed(D, K, variance=1.): +def Fixed(D, K, variance=1.): """ - Construct a fixed effect kernel. + Construct a Fixed effect kernel. Arguments --------- @@ -314,7 +314,7 @@ def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False): part = rbfcospart(D,variance,frequencies,bandwidths,ARD) return kern(D,[part]) -def independent_outputs(k): +def IndependentOutputs(k): """ Construct a kernel with independent outputs from an existing kernel """ diff --git a/GPy/kern/coregionalise.py b/GPy/kern/coregionalise.py index 7fe922fd..8faceafe 100644 --- a/GPy/kern/coregionalise.py +++ b/GPy/kern/coregionalise.py @@ -1,13 +1,13 @@ # Copyright (c) 2012, James Hensman and Ricardo Andrade # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np from GPy.util.linalg import mdot, pdinv import pdb from scipy import weave -class Coregionalise(kernpart): +class Coregionalise(Kernpart): """ Kernel for Intrinsic Corregionalization Models """ @@ -26,14 +26,14 @@ class Coregionalise(kernpart): else: assert kappa.shape==(self.Nout,) self.kappa = kappa - self.Nparam = self.Nout*(self.R + 1) + self.num_params = self.Nout*(self.R + 1) self._set_params(np.hstack([self.W.flatten(),self.kappa])) def _get_params(self): return np.hstack([self.W.flatten(),self.kappa]) def _set_params(self,x): - assert x.size == self.Nparam + assert x.size == self.num_params self.kappa = x[-self.Nout:] self.W = x[:-self.Nout].reshape(self.Nout,self.R) self.B = np.dot(self.W,self.W.T) + np.diag(self.kappa) @@ -69,14 +69,14 @@ class Coregionalise(kernpart): else: index2 = np.asarray(index2,dtype=np.int) code=""" - for(int i=0;i 1: self.tie_params(index) @@ -223,14 +222,14 @@ class kern(parameterised): def dK_dtheta(self, dL_dK, X, X2=None): """ :param dL_dK: An array of dL_dK derivaties, dL_dK - :type dL_dK: Np.ndarray (N x M) + :type dL_dK: Np.ndarray (N x num_inducing) :param X: Observed data inputs :type X: np.ndarray (N x input_dim) :param X2: Observed dara inputs (optional, defaults to X) - :type X2: np.ndarray (M x input_dim) + :type X2: np.ndarray (num_inducing x input_dim) """ assert X.shape[1] == self.input_dim - target = np.zeros(self.Nparam) + target = np.zeros(self.num_params) if X2 is None: [p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)] else: @@ -259,7 +258,7 @@ class kern(parameterised): def dKdiag_dtheta(self, dL_dKdiag, X): assert X.shape[1] == self.input_dim assert dL_dKdiag.size == X.shape[0] - target = np.zeros(self.Nparam) + target = np.zeros(self.num_params) [p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] return self._transform_gradients(target) @@ -275,7 +274,7 @@ class kern(parameterised): return target def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S): - target = np.zeros(self.Nparam) + target = np.zeros(self.num_params) [p.dpsi0_dtheta(dL_dpsi0, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] return self._transform_gradients(target) @@ -290,7 +289,7 @@ class kern(parameterised): return target def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S): - target = np.zeros((self.Nparam)) + target = np.zeros((self.num_params)) [p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)] return self._transform_gradients(target) @@ -300,16 +299,16 @@ class kern(parameterised): return target def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S): - """return shapes are N,M,input_dim""" + """return shapes are N,num_inducing,input_dim""" target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1])) [p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] return target_mu, target_S def psi2(self, Z, mu, S): """ - :param Z: np.ndarray of inducing inputs (M x input_dim) + :param Z: np.ndarray of inducing inputs (num_inducing x input_dim) :param mu, S: np.ndarrays of means and variances (each N x input_dim) - :returns psi2: np.ndarray (N,M,M) + :returns psi2: np.ndarray (N,num_inducing,num_inducing) """ target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0])) [p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)] @@ -327,13 +326,13 @@ class kern(parameterised): p2.psi1(Z, mu, S, tmp2) prod = np.multiply(tmp1, tmp2) - crossterms += prod[:,:,None] + prod[:, None, :] - + crossterms += prod[:, :, None] + prod[:, None, :] + target += crossterms return target def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S): - target = np.zeros(self.Nparam) + target = np.zeros(self.num_params) [p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)] # compute the "cross" terms @@ -345,14 +344,14 @@ class kern(parameterised): tmp = np.zeros((mu.shape[0], Z.shape[0])) p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dtheta((tmp[:,None,:]*dL_dpsi2).sum(1)*2., Z, mu, S, target[ps2]) + p2.dpsi1_dtheta((tmp[:, None, :] * dL_dpsi2).sum(1) * 2., Z, mu, S, target[ps2]) return self._transform_gradients(target) def dpsi2_dZ(self, dL_dpsi2, Z, mu, S): target = np.zeros_like(Z) [p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)] - #target *= 2 + # target *= 2 # compute the "cross" terms # TODO: we need input_slices here. @@ -362,7 +361,7 @@ class kern(parameterised): tmp = np.zeros((mu.shape[0], Z.shape[0])) p1.psi1(Z, mu, S, tmp) tmp2 = np.zeros_like(target) - p2.dpsi1_dZ((tmp[:,None,:]*dL_dpsi2).sum(1).T, Z, mu, S, tmp2) + p2.dpsi1_dZ((tmp[:, None, :] * dL_dpsi2).sum(1).T, Z, mu, S, tmp2) target += tmp2 return target * 2 @@ -379,7 +378,7 @@ class kern(parameterised): tmp = np.zeros((mu.shape[0], Z.shape[0])) p1.psi1(Z, mu, S, tmp) - p2.dpsi1_dmuS((tmp[:,None,:]*dL_dpsi2).sum(1).T*2., Z, mu, S, target_mu, target_S) + p2.dpsi1_dmuS((tmp[:, None, :] * dL_dpsi2).sum(1).T * 2., Z, mu, S, target_mu, target_S) return target_mu, target_S @@ -430,7 +429,7 @@ class kern(parameterised): Xnew = np.vstack((xx.flatten(), yy.flatten())).T Kx = self.K(Xnew, x, which_parts) Kx = Kx.reshape(resolution, resolution).T - pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) + pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs) # @UndefinedVariable pb.xlim(xmin[0], xmax[0]) pb.ylim(xmin[1], xmax[1]) pb.xlabel("x1") diff --git a/GPy/kern/kernpart.py b/GPy/kern/kernpart.py index a4dfdd92..2ed56d66 100644 --- a/GPy/kern/kernpart.py +++ b/GPy/kern/kernpart.py @@ -2,7 +2,7 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -class kernpart(object): +class Kernpart(object): def __init__(self,input_dim): """ The base class for a kernpart: a positive definite function which forms part of a kernel @@ -13,7 +13,7 @@ class kernpart(object): Do not instantiate. """ self.input_dim = input_dim - self.Nparam = 1 + self.num_params = 1 self.name = 'unnamed' def _get_params(self): diff --git a/GPy/kern/linear.py b/GPy/kern/linear.py index 425b81bb..b2aa29f9 100644 --- a/GPy/kern/linear.py +++ b/GPy/kern/linear.py @@ -2,12 +2,12 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np from ..util.linalg import tdot from scipy import weave -class linear(kernpart): +class linear(Kernpart): """ Linear kernel @@ -28,7 +28,7 @@ class linear(kernpart): self.input_dim = input_dim self.ARD = ARD if ARD == False: - self.Nparam = 1 + self.num_params = 1 self.name = 'linear' if variances is not None: variances = np.asarray(variances) @@ -37,7 +37,7 @@ class linear(kernpart): variances = np.ones(1) self._Xcache, self._X2cache = np.empty(shape=(2,)) else: - self.Nparam = self.input_dim + self.num_params = self.input_dim self.name = 'linear' if variances is not None: variances = np.asarray(variances) @@ -54,12 +54,12 @@ class linear(kernpart): return self.variances def _set_params(self, x): - assert x.size == (self.Nparam) + assert x.size == (self.num_params) self.variances = x self.variances2 = np.square(self.variances) def _get_param_names(self): - if self.Nparam == 1: + if self.num_params == 1: return ['variance'] else: return ['variance_%i' % i for i in range(self.variances.size)] @@ -138,7 +138,7 @@ class linear(kernpart): def psi2(self, Z, mu, S, target): """ - returns N,M,M matrix + returns N,num_inducing,num_inducing matrix """ self._psi_computations(Z, mu, S) # psi2_old = self.ZZ * np.square(self.variances) * self.mu2_S[:, None, None, :] @@ -168,7 +168,7 @@ class linear(kernpart): target += tmp.sum() def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S): - """Think N,M,M,input_dim """ + """Think N,num_inducing,num_inducing,input_dim """ self._psi_computations(Z, mu, S) AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :] AZZA = AZZA + AZZA.swapaxes(1, 2) @@ -184,7 +184,7 @@ class linear(kernpart): double factor,tmp; #pragma omp parallel for private(m,mm,q,qq,factor,tmp) for(n=0;n' + k2.name self.k1 = k1 self.k2 = k2 @@ -40,8 +40,8 @@ class prod(kernpart): def _set_params(self,x): """set the value of the parameters.""" - self.k1._set_params(x[:self.k1.Nparam]) - self.k2._set_params(x[self.k1.Nparam:]) + self.k1._set_params(x[:self.k1.num_params]) + self.k2._set_params(x[self.k1.num_params:]) def _get_param_names(self): """return parameter names.""" @@ -55,11 +55,11 @@ class prod(kernpart): """derivative of the covariance matrix with respect to the parameters.""" self._K_computations(X,X2) if X2 is None: - self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.Nparam]) - self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.Nparam:]) + self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], None, target[:self.k1.num_params]) + self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], None, target[self.k1.num_params:]) else: - self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.Nparam]) - self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.Nparam:]) + self.k1.dK_dtheta(dL_dK*self._K2, X[:,self.slice1], X2[:,self.slice1], target[:self.k1.num_params]) + self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.slice2], X2[:,self.slice2], target[self.k1.num_params:]) def Kdiag(self,X,target): """Compute the diagonal of the covariance matrix associated to X.""" @@ -74,8 +74,8 @@ class prod(kernpart): K2 = np.zeros(X.shape[0]) self.k1.Kdiag(X[:,self.slice1],K1) self.k2.Kdiag(X[:,self.slice2],K2) - self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.Nparam]) - self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.Nparam:]) + self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,self.slice1],target[:self.k1.num_params]) + self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.slice2],target[self.k1.num_params:]) def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" diff --git a/GPy/kern/prod_orthogonal.py b/GPy/kern/prod_orthogonal.py index 0c0d5d98..237c9557 100644 --- a/GPy/kern/prod_orthogonal.py +++ b/GPy/kern/prod_orthogonal.py @@ -1,23 +1,23 @@ # Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np import hashlib #from scipy import integrate # This may not be necessary (Nicolas, 20th Feb) -class prod_orthogonal(kernpart): +class prod_orthogonal(Kernpart): """ Computes the product of 2 kernels :param k1, k2: the kernels to multiply - :type k1, k2: kernpart + :type k1, k2: Kernpart :rtype: kernel object """ def __init__(self,k1,k2): self.input_dim = k1.input_dim + k2.input_dim - self.Nparam = k1.Nparam + k2.Nparam + self.num_params = k1.num_params + k2.num_params self.name = k1.name + '' + k2.name self.k1 = k1 self.k2 = k2 @@ -30,8 +30,8 @@ class prod_orthogonal(kernpart): def _set_params(self,x): """set the value of the parameters.""" - self.k1._set_params(x[:self.k1.Nparam]) - self.k2._set_params(x[self.k1.Nparam:]) + self.k1._set_params(x[:self.k1.num_params]) + self.k2._set_params(x[self.k1.num_params:]) def _get_param_names(self): """return parameter names.""" @@ -45,11 +45,11 @@ class prod_orthogonal(kernpart): """derivative of the covariance matrix with respect to the parameters.""" self._K_computations(X,X2) if X2 is None: - self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.Nparam]) - self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.Nparam:]) + self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], None, target[:self.k1.num_params]) + self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], None, target[self.k1.num_params:]) else: - self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.Nparam]) - self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.Nparam:]) + self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.input_dim], X2[:,:self.k1.input_dim], target[:self.k1.num_params]) + self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.input_dim:], X2[:,self.k1.input_dim:], target[self.k1.num_params:]) def Kdiag(self,X,target): """Compute the diagonal of the covariance matrix associated to X.""" @@ -64,8 +64,8 @@ class prod_orthogonal(kernpart): K2 = np.zeros(X.shape[0]) self.k1.Kdiag(X[:,:self.k1.input_dim],K1) self.k2.Kdiag(X[:,self.k1.input_dim:],K2) - self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.Nparam]) - self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.Nparam:]) + self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.input_dim],target[:self.k1.num_params]) + self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.input_dim:],target[self.k1.num_params:]) def dK_dX(self,dL_dK,X,X2,target): """derivative of the covariance matrix with respect to X.""" diff --git a/GPy/kern/rational_quadratic.py b/GPy/kern/rational_quadratic.py index a9139940..d1e7a7e3 100644 --- a/GPy/kern/rational_quadratic.py +++ b/GPy/kern/rational_quadratic.py @@ -2,10 +2,10 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np -class rational_quadratic(kernpart): +class rational_quadratic(Kernpart): """ rational quadratic kernel @@ -21,13 +21,13 @@ class rational_quadratic(kernpart): :type lengthscale: float :param power: the power :math:`\\alpha` :type power: float - :rtype: kernpart object + :rtype: Kernpart object """ def __init__(self,input_dim,variance=1.,lengthscale=1.,power=1.): assert input_dim == 1, "For this kernel we assume input_dim=1" self.input_dim = input_dim - self.Nparam = 3 + self.num_params = 3 self.name = 'rat_quad' self.variance = variance self.lengthscale = lengthscale diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index c71fefef..2d316234 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -2,13 +2,13 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np import hashlib from scipy import weave from ..util.linalg import tdot -class rbf(kernpart): +class rbf(Kernpart): """ Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel: @@ -36,14 +36,14 @@ class rbf(kernpart): self.name = 'rbf' self.ARD = ARD if not ARD: - self.Nparam = 2 + self.num_params = 2 if lengthscale is not None: lengthscale = np.asarray(lengthscale) assert lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel" else: lengthscale = np.ones(1) else: - self.Nparam = self.input_dim + 1 + self.num_params = self.input_dim + 1 if lengthscale is not None: lengthscale = np.asarray(lengthscale) assert lengthscale.size == self.input_dim, "bad number of lengthscales" @@ -67,7 +67,7 @@ class rbf(kernpart): return np.hstack((self.variance, self.lengthscale)) def _set_params(self, x): - assert x.size == (self.Nparam) + assert x.size == (self.num_params) self.variance = x[0] self.lengthscale = x[1:] self.lengthscale2 = np.square(self.lengthscale) @@ -76,7 +76,7 @@ class rbf(kernpart): self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S def _get_param_names(self): - if self.Nparam == 2: + if self.num_params == 2: return ['variance', 'lengthscale'] else: return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)] @@ -110,7 +110,7 @@ class rbf(kernpart): target(q+1) += var_len3(q)*tmp; } """ - N, M, input_dim = X.shape[0], X.shape[0], self.input_dim + N, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim else: code = """ int q,i,j; @@ -118,16 +118,16 @@ class rbf(kernpart): for(q=0; q """ weave.inline(code, support_code=support_code, libraries=['gomp'], - arg_names=['N', 'M', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'], + arg_names=['N','num_inducing','input_dim','mu','Zhat','mudist_sq','mudist','lengthscale2','_psi2_denom','psi2_Zdist_sq','psi2_exponent','half_log_psi2_denom','psi2','variance_sq'], type_converters=weave.converters.blitz, **self.weave_options) return mudist, mudist_sq, psi2_exponent, psi2 diff --git a/GPy/kern/rbfcos.py b/GPy/kern/rbfcos.py index 047dad0b..b1e99d3c 100644 --- a/GPy/kern/rbfcos.py +++ b/GPy/kern/rbfcos.py @@ -3,10 +3,10 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np -class rbfcos(kernpart): +class rbfcos(Kernpart): def __init__(self,input_dim,variance=1.,frequencies=None,bandwidths=None,ARD=False): self.input_dim = input_dim self.name = 'rbfcos' @@ -14,9 +14,9 @@ class rbfcos(kernpart): print "Warning: the rbfcos kernel requires a lot of memory for high dimensional inputs" self.ARD = ARD - #set the default frequencies and bandwidths, appropriate Nparam + #set the default frequencies and bandwidths, appropriate num_params if ARD: - self.Nparam = 2*self.input_dim + 1 + self.num_params = 2*self.input_dim + 1 if frequencies is not None: frequencies = np.asarray(frequencies) assert frequencies.size == self.input_dim, "bad number of frequencies" @@ -28,7 +28,7 @@ class rbfcos(kernpart): else: bandwidths = np.ones(self.input_dim) else: - self.Nparam = 3 + self.num_params = 3 if frequencies is not None: frequencies = np.asarray(frequencies) assert frequencies.size == 1, "Exactly one frequency needed for non-ARD kernel" @@ -51,7 +51,7 @@ class rbfcos(kernpart): return np.hstack((self.variance,self.frequencies, self.bandwidths)) def _set_params(self,x): - assert x.size==(self.Nparam) + assert x.size==(self.num_params) if self.ARD: self.variance = x[0] self.frequencies = x[1:1+self.input_dim] @@ -60,7 +60,7 @@ class rbfcos(kernpart): self.variance, self.frequencies, self.bandwidths = x def _get_param_names(self): - if self.Nparam == 3: + if self.num_params == 3: return ['variance','frequency','bandwidth'] else: return ['variance']+['frequency_%i'%i for i in range(self.input_dim)]+['bandwidth_%i'%i for i in range(self.input_dim)] @@ -106,7 +106,7 @@ class rbfcos(kernpart): self._dist2 = np.square(self._dist) #ensure the next section is computed: - self._params = np.empty(self.Nparam) + self._params = np.empty(self.num_params) if not np.all(self._params == self._get_params()): self._params == self._get_params().copy() diff --git a/GPy/kern/spline.py b/GPy/kern/spline.py index fbfc7573..f2802180 100644 --- a/GPy/kern/spline.py +++ b/GPy/kern/spline.py @@ -2,14 +2,14 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np import hashlib def theta(x): """Heaviside step function""" return np.where(x>=0.,1.,0.) -class spline(kernpart): +class spline(Kernpart): """ Spline kernel @@ -23,7 +23,7 @@ class spline(kernpart): def __init__(self,input_dim,variance=1.,lengthscale=1.): self.input_dim = input_dim assert self.input_dim==1 - self.Nparam = 1 + self.num_params = 1 self.name = 'spline' self._set_params(np.squeeze(variance)) diff --git a/GPy/kern/symmetric.py b/GPy/kern/symmetric.py index be26e6db..c7099a6f 100644 --- a/GPy/kern/symmetric.py +++ b/GPy/kern/symmetric.py @@ -1,18 +1,18 @@ # Copyright (c) 2012 James Hensman # Licensed under the BSD 3-clause license (see LICENSE.txt) -from kernpart import kernpart +from kernpart import Kernpart import numpy as np -class symmetric(kernpart): +class symmetric(Kernpart): """ Symmetrical kernels :param k: the kernel to symmetrify - :type k: kernpart + :type k: Kernpart :param transform: the transform to use in symmetrification (allows symmetry on specified axes) :type transform: A numpy array (input_dim x input_dim) specifiying the transform - :rtype: kernpart + :rtype: Kernpart """ def __init__(self,k,transform=None): @@ -21,7 +21,7 @@ class symmetric(kernpart): assert transform.shape == (k.input_dim, k.input_dim) self.transform = transform self.input_dim = k.input_dim - self.Nparam = k.Nparam + self.num_params = k.num_params self.name = k.name + '_symm' self.k = k self._set_params(k._get_params()) diff --git a/GPy/kern/sympykern.py b/GPy/kern/sympykern.py index 5c55f0db..def1bc5f 100644 --- a/GPy/kern/sympykern.py +++ b/GPy/kern/sympykern.py @@ -9,9 +9,9 @@ import sys current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) import tempfile import pdb -from kernpart import kernpart +from kernpart import Kernpart -class spkern(kernpart): +class spkern(Kernpart): """ A kernel object, where all the hard work in done by sympy. @@ -38,12 +38,12 @@ class spkern(kernpart): self.input_dim = len(self._sp_x) assert self.input_dim == input_dim self._sp_theta = sorted([e for e in sp_vars if not (e.name[0]=='x' or e.name[0]=='z')],key=lambda e:e.name) - self.Nparam = len(self._sp_theta) + self.num_params = len(self._sp_theta) #deal with param if param is None: - param = np.ones(self.Nparam) - assert param.size==self.Nparam + param = np.ones(self.num_params) + assert param.size==self.num_params self._set_params(param) #Differentiate! @@ -115,19 +115,19 @@ class spkern(kernpart): #Here's some code to do the looping for K arglist = ", ".join(["X[i*input_dim+%s]"%x.name[1:] for x in self._sp_x]\ + ["Z[j*input_dim+%s]"%z.name[1:] for z in self._sp_z]\ - + ["param[%i]"%i for i in range(self.Nparam)]) + + ["param[%i]"%i for i in range(self.num_params)]) self._K_code =\ """ int i; int j; int N = target_array->dimensions[0]; - int M = target_array->dimensions[1]; + int num_inducing = target_array->dimensions[1]; int input_dim = X_array->dimensions[1]; //#pragma omp parallel for private(j) for (i=0;idimensions[0]; - int M = partial_array->dimensions[1]; + int num_inducing = partial_array->dimensions[1]; int input_dim = X_array->dimensions[1]; //#pragma omp parallel for private(j) for (i=0;idimensions[0]; - int M = partial_array->dimensions[1]; + int num_inducing = partial_array->dimensions[1]; int input_dim = X_array->dimensions[1]; //#pragma omp parallel for private(j) for (i=0;idimensions[0]; - int M = 0; + int num_inducing = 0; int input_dim = X_array->dimensions[1]; for (i=0;i>>>>>> 7040b26f41f382edfdca3d3f7b689b9bbfc1a54f:GPy/models/generalized_fitc.py +from scipy import linalg +from GPy.core.sparse_gp import SparseGP +from GPy.util.linalg import mdot -def backsub_both_sides(L,X): +def backsub_both_sides(L, X): """ Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky""" - tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1) - return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T + tmp, _ = linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(X), lower=1, trans=1) + return linalg.lapack.flapack.dtrtrs(L, np.asfortranarray(tmp.T), lower=1, trans=1)[0].T class GeneralizedFITC(SparseGP): @@ -45,17 +38,13 @@ class GeneralizedFITC(SparseGP): self.num_inducing = self.Z.shape[0] self.true_precision = likelihood.precision -<<<<<<< HEAD:GPy/models/generalized_FITC.py - sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False) -======= super(GeneralizedFITC, self).__init__(X, likelihood, kernel=kernel, Z=self.Z, X_variance=X_variance, normalize_X=normalize_X) self._set_params(self._get_params()) ->>>>>>> 7040b26f41f382edfdca3d3f7b689b9bbfc1a54f:GPy/models/generalized_fitc.py def _set_params(self, p): - self.Z = p[:self.num_inducing*self.input_dim].reshape(self.num_inducing, self.input_dim) - self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam]) - self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:]) + self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim) + self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.num_params]) + self.likelihood._set_params(p[self.Z.size + self.kern.num_params:]) self._compute_kernel_matrices() self._computations() self._FITC_computations() @@ -73,9 +62,9 @@ class GeneralizedFITC(SparseGP): if self.has_uncertain_inputs: raise NotImplementedError, "FITC approximation not implemented for uncertain inputs" else: - self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) + self.likelihood.fit_FITC(self.Kmm, self.psi1, self.psi0) self.true_precision = self.likelihood.precision # Save the true precision - self.likelihood.precision = self.true_precision/(1. + self.true_precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation + self.likelihood.precision = self.true_precision / (1. + self.true_precision * self.Diag0[:, None]) # Add the diagonal element of the FITC approximation self._set_params(self._get_params()) # update the GP def _FITC_computations(self): @@ -87,37 +76,37 @@ class GeneralizedFITC(SparseGP): - removes the extra terms computed in the SparseGP approximation - computes the likelihood gradients wrt the true precision. """ - #NOTE the true precison is now 'true_precision' not 'precision' + # NOTE the true precison is now 'true_precision' not 'precision' if self.likelihood.is_heteroscedastic: # Compute generalized FITC's diagonal term of the covariance - self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.num_inducing),lower=1) - Lmipsi1 = np.dot(self.Lmi,self.psi1) - self.Qnn = np.dot(Lmipsi1.T,Lmipsi1) - #self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm) - #self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1) - #a = kj + self.Lmi, info = linalg.lapack.flapack.dtrtrs(self.Lm, np.eye(self.num_inducing), lower=1) + Lmipsi1 = np.dot(self.Lmi, self.psi1) + self.Qnn = np.dot(Lmipsi1.T, Lmipsi1) + # self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm) + # self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1) + # a = kj self.Diag0 = self.psi0 - np.diag(self.Qnn) - Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.true_precision.flatten()) + Iplus_Dprod_i = 1. / (1. + self.Diag0 * self.true_precision.flatten()) self.Diag = self.Diag0 * Iplus_Dprod_i - self.P = Iplus_Dprod_i[:,None] * self.psi1.T - self.RPT0 = np.dot(self.Lmi,self.psi1) - self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T)) - self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1) - self.RPT = np.dot(self.R,self.P.T) - self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT) + self.P = Iplus_Dprod_i[:, None] * self.psi1.T + self.RPT0 = np.dot(self.Lmi, self.psi1) + self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0, ((1. - Iplus_Dprod_i) / self.Diag0)[:, None] * self.RPT0.T)) + self.R, info = linalg.lapack.dtrtrs(self.L, self.Lmi, lower=1) + self.RPT = np.dot(self.R, self.P.T) + self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T, self.RPT) self.w = self.Diag * self.likelihood.v_tilde - self.Gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde)) - self.mu = self.w + np.dot(self.P,self.Gamma) + self.Gamma = np.dot(self.R.T, np.dot(self.RPT, self.likelihood.v_tilde)) + self.mu = self.w + np.dot(self.P, self.Gamma) # Remove extra term from dL_dpsi1 - self.dL_dpsi1 -= mdot(self.Lmi.T,Lmipsi1*self.likelihood.precision.flatten().reshape(1,self.N)) + self.dL_dpsi1 -= mdot(self.Lmi.T,Lmipsi1 * self.likelihood.precision.flatten().reshape(1,self.num_data)) #self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm) - #self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB + #self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.num_data)) #dB #########333333 - #self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) + # self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B) #########333333 @@ -125,16 +114,16 @@ class GeneralizedFITC(SparseGP): else: raise NotImplementedError, "homoscedastic fitc not implemented" # Remove extra term from dL_dpsi1 - #self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision) #dB + # self.dL_dpsi1 += -mdot(self.Kmmi,self.psi1*self.likelihood.precision) #dB sf = self.scale_factor - sf2 = sf**2 + sf2 = sf ** 2 # Remove extra term from dL_dKmm - self.dL_dKmm += 0.5 * self.input_dim * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB + self.dL_dKmm += 0.5 * self.input_dim * mdot(self.Lmi.T, self.A, self.Lmi) * sf2 # dB self.dL_dpsi0 = None - #the partial derivative vector for the likelihood + # the partial derivative vector for the likelihood if self.likelihood.Nparams == 0: self.partial_for_likelihood = None elif self.likelihood.is_heteroscedastic: @@ -142,7 +131,7 @@ class GeneralizedFITC(SparseGP): else: raise NotImplementedError, "homoscedastic derivatives not implemented" #likelihood is not heterscedatic - #self.partial_for_likelihood = - 0.5 * self.N*self.input_dim*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2 + #self.partial_for_likelihood = - 0.5 * self.num_data*self.input_dim*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2 #self.partial_for_likelihood += 0.5 * self.input_dim * trace_dot(self.Bi,self.A)*self.likelihood.precision #self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1)) #TODO partial derivative vector for the likelihood not implemented @@ -151,28 +140,28 @@ class GeneralizedFITC(SparseGP): """ Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel """ - dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm,self.Z) + dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z) if self.has_uncertain_inputs: raise NotImplementedError, "heteroscedatic derivates not implemented" else: - #NOTE in SparseGP this would include the gradient wrt psi0 - dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1,self.Z,self.X) + # NOTE in SparseGP this would include the gradient wrt psi0 + dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.Z, self.X) return dL_dtheta def log_likelihood(self): """ Compute the (lower bound on the) log marginal likelihood """ - sf2 = self.scale_factor**2 + sf2 = self.scale_factor ** 2 if self.likelihood.is_heteroscedastic: - A = -0.5*self.N*self.input_dim*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) + A = -0.5*self.num_data*self.input_dim*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) else: - A = -0.5*self.N*self.input_dim*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT + A = -0.5*self.num_data*self.input_dim*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT C = -self.input_dim * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.num_inducing*np.log(sf2)) #C = -0.5*self.input_dim * (self.B_logdet + self.num_inducing*np.log(sf2)) D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V)) #self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T) #D_ = 0.5*np.trace(self.Cpsi1VVpsi1) - return A+C+D + return A + C + D def _raw_predict(self, Xnew, which_parts, full_cov=False): if self.likelihood.is_heteroscedastic: @@ -191,30 +180,30 @@ class GeneralizedFITC(SparseGP): # = I - [RPT0] * (U*U.T)^-1 * [RPT0].T # = I - V.T * V U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn) - V,info = linalg.flapack.dtrtrs(U,self.RPT0.T,lower=1) - C = np.eye(self.num_inducing) - np.dot(V.T,V) - mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:]) - #self.C = C - #self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T - #self.mu_u = mu_u - #self.U = U + V, info = linalg.flapack.dtrtrs(U, self.RPT0.T, lower=1) + C = np.eye(self.num_inducing) - np.dot(V.T, V) + mu_u = np.dot(C, self.RPT0) * (1. / self.Diag0[None, :]) + # self.C = C + # self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T + # self.mu_u = mu_u + # self.U = U # q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T) - mu_H = np.dot(mu_u,self.mu) + mu_H = np.dot(mu_u, self.mu) self.mu_H = mu_H - Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T)) + Sigma_H = C + np.dot(mu_u, np.dot(self.Sigma, mu_u.T)) # q(f_star|y) = N(f_star|mu_star,sigma2_star) Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts) - KR0T = np.dot(Kx.T,self.Lmi.T) - mu_star = np.dot(KR0T,mu_H) + KR0T = np.dot(Kx.T, self.Lmi.T) + mu_star = np.dot(KR0T, mu_H) if full_cov: - Kxx = self.kern.K(Xnew,which_parts=which_parts) - var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T)) + Kxx = self.kern.K(Xnew, which_parts=which_parts) + var = Kxx + np.dot(KR0T, np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T)) else: - Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts) - Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed? - var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T)) # TODO: RA, is this line needed? - var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T),0))[:,None] - return mu_star[:,None],var + Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) + Kxx_ = self.kern.K(Xnew, which_parts=which_parts) # TODO: RA, is this line needed? + var_ = Kxx_ + np.dot(KR0T, np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T)) # TODO: RA, is this line needed? + var = (Kxx + np.sum(KR0T.T * np.dot(Sigma_H - np.eye(self.num_inducing), KR0T.T), 0))[:, None] + return mu_star[:, None], var else: raise NotImplementedError, "homoscedastic fitc not implemented" """ diff --git a/GPy/models/gplvm.py b/GPy/models/gplvm.py index 5589304a..e602a59a 100644 --- a/GPy/models/gplvm.py +++ b/GPy/models/gplvm.py @@ -6,7 +6,7 @@ import numpy as np import pylab as pb import sys, pdb from .. import kern -from ..core import model +from ..core import Model from ..util.linalg import pdinv, PCA from ..core import GP from ..likelihoods import Gaussian @@ -42,13 +42,13 @@ class GPLVM(GP): return np.random.randn(Y.shape[0], input_dim) def _get_param_names(self): - return sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.N)],[]) + GP._get_param_names(self) + return sum([['X_%i_%i'%(n,q) for q in range(self.input_dim)] for n in range(self.num_data)],[]) + GP._get_param_names(self) def _get_params(self): return np.hstack((self.X.flatten(), GP._get_params(self))) def _set_params(self,x): - self.X = x[:self.N*self.input_dim].reshape(self.N,self.input_dim).copy() + self.X = x[:self.num_data*self.input_dim].reshape(self.num_data,self.input_dim).copy() GP._set_params(self, x[self.X.size:]) def _log_likelihood_gradients(self): diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 4300f113..b078fd27 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -3,7 +3,7 @@ Created on 10 Apr 2013 @author: Max Zwiessele ''' -from GPy.core import model +from GPy.core import Model from GPy.core import SparseGP from GPy.util.linalg import PCA import numpy @@ -12,7 +12,7 @@ import pylab from GPy.kern.kern import kern from GPy.models.bayesian_gplvm import BayesianGPLVM -class MRD(model): +class MRD(Model): """ Do MRD on given Datasets in Ylist. All Ys in likelihood_list are in [N x Dn], where Dn can be different per Yn, @@ -33,7 +33,7 @@ class MRD(model): :param X_variance: Initial latent space variance :param init: [cooncat|single|random] - initialization method to use: + initialization method to use: *concat: PCA on concatenated outputs *single: PCA on each output *random: random @@ -44,7 +44,7 @@ class MRD(model): :param kernels: list of kernels or kernel shared for all BGPLVMS :type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default) """ - def __init__(self, likelihood_or_Y_list, input_dim, M=10, names=None, + def __init__(self, likelihood_or_Y_list, input_dim, num_inducing=10, names=None, kernels=None, initx='PCA', initz='permute', _debug=False, **kw): if names is None: @@ -61,24 +61,24 @@ class MRD(model): assert not ('kernel' in kw), "pass kernels through `kernels` argument" self.input_dim = input_dim - self.num_inducing = M + self.num_inducing = num_inducing self._debug = _debug self._init = True X = self._init_X(initx, likelihood_or_Y_list) Z = self._init_Z(initz, X) - self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, M=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)] + self.bgplvms = [BayesianGPLVM(l, input_dim=input_dim, kernel=k, X=X, Z=Z, num_inducing=self.num_inducing, **kw) for l, k in zip(likelihood_or_Y_list, kernels)] del self._init self.gref = self.bgplvms[0] nparams = numpy.array([0] + [SparseGP._get_params(g).size - g.Z.size for g in self.bgplvms]) self.nparams = nparams.cumsum() - self.N = self.gref.N - self.NQ = self.N * self.input_dim + self.num_data = self.gref.num_data + self.NQ = self.num_data * self.input_dim self.MQ = self.num_inducing * self.input_dim - model.__init__(self) # @UndefinedVariable + Model.__init__(self) # @UndefinedVariable self._set_params(self._get_params()) @property @@ -142,8 +142,8 @@ class MRD(model): self._init_Z(initz, self.X) def _get_param_names(self): - # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) - # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) + # X_names = sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + # S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) n1 = self.gref._get_param_names() n1var = n1[:self.NQ * 2 + self.MQ] map_names = lambda ns, name: map(lambda x: "{1}_{0}".format(*x), @@ -169,8 +169,8 @@ class MRD(model): return params # def _set_var_params(self, g, X, X_var, Z): -# g.X = X.reshape(self.N, self.input_dim) -# g.X_variance = X_var.reshape(self.N, self.input_dim) +# g.X = X.reshape(self.num_data, self.input_dim) +# g.X_variance = X_var.reshape(self.num_data, self.input_dim) # g.Z = Z.reshape(self.num_inducing, self.input_dim) # # def _set_kern_params(self, g, p): diff --git a/GPy/models/sparse_gp_classification.py b/GPy/models/sparse_gp_classification.py index f82de00f..9027ef07 100644 --- a/GPy/models/sparse_gp_classification.py +++ b/GPy/models/sparse_gp_classification.py @@ -26,7 +26,7 @@ class SparseGPClassification(SparseGP): """ - def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10): + def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10): if kernel is None: kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3) @@ -38,7 +38,7 @@ class SparseGPClassification(SparseGP): raise Warning, 'likelihood.data and Y are different.' if Z is None: - i = np.random.permutation(X.shape[0])[:M] + i = np.random.permutation(X.shape[0])[:num_inducing] Z = X[i].copy() else: assert Z.shape[1]==X.shape[1] diff --git a/GPy/models/sparse_gp_regression.py b/GPy/models/sparse_gp_regression.py index d7907c5f..432d6e18 100644 --- a/GPy/models/sparse_gp_regression.py +++ b/GPy/models/sparse_gp_regression.py @@ -26,14 +26,14 @@ class SparseGPRegression(SparseGP): """ - def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10, X_variance=None): + def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10, X_variance=None): # kern defaults to rbf (plus white for stability) if kernel is None: kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1], 1e-3) # Z defaults to a subset of the data if Z is None: - i = np.random.permutation(X.shape[0])[:M] + i = np.random.permutation(X.shape[0])[:num_inducing] Z = X[i].copy() else: assert Z.shape[1] == X.shape[1] diff --git a/GPy/models/sparse_gplvm.py b/GPy/models/sparse_gplvm.py index bec8c2da..76fe65f1 100644 --- a/GPy/models/sparse_gplvm.py +++ b/GPy/models/sparse_gplvm.py @@ -23,19 +23,19 @@ class SparseGPLVM(SparseGPRegression, GPLVM): :type init: 'PCA'|'random' """ - def __init__(self, Y, input_dim, kernel=None, init='PCA', M=10): + def __init__(self, Y, input_dim, kernel=None, init='PCA', num_inducing=10): X = self.initialise_latent(init, input_dim, Y) - SparseGPRegression.__init__(self, X, Y, kernel=kernel, M=M) + SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing) def _get_param_names(self): - return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.N)], []) + return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) + SparseGPRegression._get_param_names(self)) def _get_params(self): return np.hstack((self.X.flatten(), SparseGPRegression._get_params(self))) def _set_params(self, x): - self.X = x[:self.X.size].reshape(self.N, self.input_dim).copy() + self.X = x[:self.X.size].reshape(self.num_data, self.input_dim).copy() SparseGPRegression._set_params(self, x[self.X.size:]) def log_likelihood(self): diff --git a/GPy/testing/bgplvm_tests.py b/GPy/testing/bgplvm_tests.py index 3ca32660..ff558f6d 100644 --- a/GPy/testing/bgplvm_tests.py +++ b/GPy/testing/bgplvm_tests.py @@ -8,67 +8,67 @@ from GPy.models.bayesian_gplvm import BayesianGPLVM class BGPLVMTests(unittest.TestCase): def test_bias_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) def test_linear_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) def test_rbf_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) def test_rbf_bias_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.rbf(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) #@unittest.skip('psi2 cross terms are NotImplemented for this combination') def test_linear_bias_kern(self): - N, M, input_dim, D = 30, 5, 4, 30 + N, num_inducing, input_dim, D = 30, 5, 4, 30 X = np.random.rand(N, input_dim) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T Y -= Y.mean(axis=0) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = BayesianGPLVM(Y, input_dim, kernel=k, M=M) + m = BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) diff --git a/GPy/testing/examples_tests.py b/GPy/testing/examples_tests.py index 51131fc3..14fa5593 100644 --- a/GPy/testing/examples_tests.py +++ b/GPy/testing/examples_tests.py @@ -12,31 +12,31 @@ from nose.tools import nottest import sys class ExamplesTests(unittest.TestCase): - def _checkgrad(self, model): - self.assertTrue(model.checkgrad()) + def _checkgrad(self, Model): + self.assertTrue(Model.checkgrad()) - def _model_instance(self, model): - self.assertTrue(isinstance(model, GPy.models)) + def _model_instance(self, Model): + self.assertTrue(isinstance(Model, GPy.models)) """ -def model_instance_generator(model): +def model_instance_generator(Model): def check_model_returned(self): - self._model_instance(model) + self._model_instance(Model) return check_model_returned -def checkgrads_generator(model): +def checkgrads_generator(Model): def model_checkgrads(self): - self._checkgrad(model) + self._checkgrad(Model) return model_checkgrads """ -def model_checkgrads(model): - model.randomize() - assert model.checkgrad() +def model_checkgrads(Model): + Model.randomize() + assert Model.checkgrad() -def model_instance(model): - assert isinstance(model, GPy.core.model) +def model_instance(Model): + assert isinstance(Model, GPy.core.Model) @nottest def test_models(): @@ -57,25 +57,25 @@ def test_models(): continue print "Testing example: ", example[0] - # Generate model - model = example[1]() - print model + # Generate Model + Model = example[1]() + print Model # Create tests for instance check """ - test = model_instance_generator(model) + test = model_instance_generator(Model) test.__name__ = 'test_instance_%s' % example[0] setattr(ExamplesTests, test.__name__, test) #Create tests for checkgrads check - test = checkgrads_generator(model) + test = checkgrads_generator(Model) test.__name__ = 'test_checkgrads_%s' % example[0] setattr(ExamplesTests, test.__name__, test) """ model_checkgrads.description = 'test_checkgrads_%s' % example[0] - yield model_checkgrads, model + yield model_checkgrads, Model model_instance.description = 'test_instance_%s' % example[0] - yield model_instance, model + yield model_instance, Model if __name__ == "__main__": print "Running unit tests, please be (very) patient..." diff --git a/GPy/testing/gplvm_tests.py b/GPy/testing/gplvm_tests.py index 4194698f..8c2ba9fc 100644 --- a/GPy/testing/gplvm_tests.py +++ b/GPy/testing/gplvm_tests.py @@ -7,7 +7,7 @@ import GPy class GPLVMTests(unittest.TestCase): def test_bias_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) @@ -19,7 +19,7 @@ class GPLVMTests(unittest.TestCase): self.assertTrue(m.checkgrad()) def test_linear_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) @@ -31,7 +31,7 @@ class GPLVMTests(unittest.TestCase): self.assertTrue(m.checkgrad()) def test_rbf_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index 2d97c82c..98c75827 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -21,7 +21,7 @@ class KernelTests(unittest.TestCase): """ X = np.random.rand(30, 4) K = np.dot(X, X.T) - kernel = GPy.kern.fixed(4, K) + kernel = GPy.kern.Fixed(4, K) Y = np.ones((30,1)) m = GPy.models.GPRegression(X,Y,kernel=kernel) self.assertTrue(m.checkgrad()) diff --git a/GPy/testing/mrd_tests.py b/GPy/testing/mrd_tests.py index df4c2723..b0137709 100644 --- a/GPy/testing/mrd_tests.py +++ b/GPy/testing/mrd_tests.py @@ -14,7 +14,7 @@ class MRDTests(unittest.TestCase): def test_gradients(self): num_m = 3 - N, M, input_dim, D = 20, 8, 6, 20 + N, num_inducing, input_dim, D = 20, 8, 6, 20 X = np.random.rand(N, input_dim) k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) @@ -23,7 +23,7 @@ class MRDTests(unittest.TestCase): Ylist = [np.random.multivariate_normal(np.zeros(N), K, input_dim).T for _ in range(num_m)] likelihood_list = [GPy.likelihoods.Gaussian(Y) for Y in Ylist] - m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, M=M) + m = GPy.models.MRD(likelihood_list, input_dim=input_dim, kernels=k, num_inducing=num_inducing) m.ensure_default_constraints() self.assertTrue(m.checkgrad()) diff --git a/GPy/testing/psi_stat_gradient_tests.py b/GPy/testing/psi_stat_gradient_tests.py index 2257d16c..b3d75953 100644 --- a/GPy/testing/psi_stat_gradient_tests.py +++ b/GPy/testing/psi_stat_gradient_tests.py @@ -8,10 +8,10 @@ import numpy import GPy import itertools -from GPy.core import model +from GPy.core import Model -class PsiStatModel(model): - def __init__(self, which, X, X_variance, Z, M, kernel): +class PsiStatModel(Model): + def __init__(self, which, X, X_variance, Z, num_inducing, kernel): self.which = which self.X = X self.X_variance = X_variance @@ -64,8 +64,8 @@ class DPsiStatTest(unittest.TestCase): def testPsi0(self): for k in self.kernels: - m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.num_inducing, kernel=k) + m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z, + num_inducing=self.num_inducing, kernel=k) try: assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts))) except: @@ -74,33 +74,33 @@ class DPsiStatTest(unittest.TestCase): # def testPsi1(self): # for k in self.kernels: # m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z, -# M=self.M, kernel=k) +# num_inducing=self.num_inducing, kernel=k) # assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_lin(self): k = self.kernels[0] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.num_inducing, kernel=k) + num_inducing=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_lin_bia(self): k = self.kernels[3] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.num_inducing, kernel=k) + num_inducing=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_rbf(self): k = self.kernels[1] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.num_inducing, kernel=k) + num_inducing=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_rbf_bia(self): k = self.kernels[-1] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.num_inducing, kernel=k) + num_inducing=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) def testPsi2_bia(self): k = self.kernels[2] m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z, - M=self.num_inducing, kernel=k) + num_inducing=self.num_inducing, kernel=k) assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts))) @@ -122,11 +122,11 @@ if __name__ == "__main__": numpy.random.seed(0) input_dim = 5 N = 50 - M = 10 + num_inducing = 10 D = 15 X = numpy.random.randn(N, input_dim) X_var = .5 * numpy.ones_like(X) + .1 * numpy.clip(numpy.random.randn(*X.shape), 0, 1) - Z = numpy.random.permutation(X)[:M] + Z = numpy.random.permutation(X)[:num_inducing] Y = X.dot(numpy.random.randn(input_dim, D)) # kernel = GPy.kern.bias(input_dim) # @@ -148,7 +148,7 @@ if __name__ == "__main__": # m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, # num_inducing=num_inducing, kernel=GPy.kern.rbf(input_dim)) m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, - M=M, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim))) + num_inducing=num_inducing, kernel=GPy.kern.linear(input_dim, ARD=True, variances=numpy.random.rand(input_dim))) m3.ensure_default_constraints() # + GPy.kern.bias(input_dim)) # m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z, diff --git a/GPy/testing/sparse_gplvm_tests.py b/GPy/testing/sparse_gplvm_tests.py index 38f95730..e7f714b4 100644 --- a/GPy/testing/sparse_gplvm_tests.py +++ b/GPy/testing/sparse_gplvm_tests.py @@ -8,38 +8,38 @@ from GPy.models.sparse_gplvm import SparseGPLVM class sparse_GPLVMTests(unittest.TestCase): def test_bias_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = SparseGPLVM(Y, input_dim, kernel=k, M=M) + m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) @unittest.skip('linear kernels do not have dKdiag_dX') def test_linear_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.linear(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = SparseGPLVM(Y, input_dim, kernel=k, M=M) + m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) def test_rbf_kern(self): - N, M, input_dim, D = 10, 3, 2, 4 + N, num_inducing, input_dim, D = 10, 3, 2, 4 X = np.random.rand(N, input_dim) k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) K = k.K(X) Y = np.random.multivariate_normal(np.zeros(N),K,input_dim).T k = GPy.kern.rbf(input_dim) + GPy.kern.white(input_dim, 0.00001) - m = SparseGPLVM(Y, input_dim, kernel=k, M=M) + m = SparseGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing) m.ensure_default_constraints() m.randomize() self.assertTrue(m.checkgrad()) diff --git a/GPy/testing/unit_tests.py b/GPy/testing/unit_tests.py index 020f3890..7ee9ef40 100644 --- a/GPy/testing/unit_tests.py +++ b/GPy/testing/unit_tests.py @@ -196,9 +196,9 @@ class GradientTests(unittest.TestCase): k = GPy.kern.rbf(1) + GPy.kern.white(1) Y = np.hstack([np.ones(N/2),-np.ones(N/2)])[:,None] - distribution = GPy.likelihoods.likelihood_functions.binomial() + distribution = GPy.likelihoods.likelihood_functions.Binomial() likelihood = GPy.likelihoods.EP(Y, distribution) - #likelihood = GPy.inference.likelihoods.binomial(Y) + #likelihood = GPy.inference.likelihoods.Binomial(Y) m = GPy.models.generalized_FITC(X,likelihood,k,inducing=4) m.constrain_positive('(var|len)') m.approximate_likelihood() diff --git a/GPy/util/plot_latent.py b/GPy/util/plot_latent.py index c09b5c93..e147d840 100644 --- a/GPy/util/plot_latent.py +++ b/GPy/util/plot_latent.py @@ -2,9 +2,9 @@ import pylab as pb import numpy as np from .. import util -def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40): +def plot_latent(Model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40): """ - :param labels: a np.array of size model.N containing labels for the points (can be number, strings, etc) + :param labels: a np.array of size Model.N containing labels for the points (can be number, strings, etc) :param resolution: the resolution of the grid on which to evaluate the predictive variance """ if ax is None: @@ -12,26 +12,26 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, util.plot.Tango.reset() if labels is None: - labels = np.ones(model.N) + labels = np.ones(Model.N) if which_indices is None: - if model.input_dim==1: + if Model.input_dim==1: input_1 = 0 input_2 = None - if model.input_dim==2: + if Model.input_dim==2: input_1, input_2 = 0,1 else: try: - input_1, input_2 = np.argsort(model.input_sensitivity())[:2] + input_1, input_2 = np.argsort(Model.input_sensitivity())[:2] except: raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" else: input_1, input_2 = which_indices #first, plot the output variance as a function of the latent space - Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(model.X[:,[input_1, input_2]],resolution=resolution) - Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) + Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(Model.X[:,[input_1, input_2]],resolution=resolution) + Xtest_full = np.zeros((Xtest.shape[0], Model.X.shape[1])) Xtest_full[:, :2] = Xtest - mu, var, low, up = model.predict(Xtest_full) + mu, var, low, up = Model.predict(Xtest_full) var = var[:, :1] ax.imshow(var.reshape(resolution, resolution).T, extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower') @@ -55,12 +55,12 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, m = marker index = np.nonzero(labels==ul)[0] - if model.input_dim==1: - x = model.X[index,input_1] + if Model.input_dim==1: + x = Model.X[index,input_1] y = np.zeros(index.size) else: - x = model.X[index,input_1] - y = model.X[index,input_2] + x = Model.X[index,input_1] + y = Model.X[index,input_2] ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), label=this_label) ax.set_xlabel('latent dimension %i'%input_1) @@ -76,16 +76,16 @@ def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, return ax -def plot_latent_indices(model, which_indices=None, *args, **kwargs): +def plot_latent_indices(Model, which_indices=None, *args, **kwargs): if which_indices is None: try: - input_1, input_2 = np.argsort(model.input_sensitivity())[:2] + input_1, input_2 = np.argsort(Model.input_sensitivity())[:2] except: raise ValueError, "cannot Automatically determine which dimensions to plot, please pass 'which_indices'" else: input_1, input_2 = which_indices - ax = plot_latent(model, which_indices=[input_1, input_2], *args, **kwargs) + ax = plot_latent(Model, which_indices=[input_1, input_2], *args, **kwargs) # TODO: Here test if there are inducing points... - ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w') + ax.plot(Model.Z[:, input_1], Model.Z[:, input_2], '^w') return ax \ No newline at end of file diff --git a/GPy/util/visualize.py b/GPy/util/visualize.py index a3c10b97..66322c15 100644 --- a/GPy/util/visualize.py +++ b/GPy/util/visualize.py @@ -43,16 +43,16 @@ class vector_show(data_show): class lvm(data_show): - def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]): - """Visualize a latent variable model + def __init__(self, vals, Model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]): + """Visualize a latent variable Model - :param model: the latent variable model to visualize. + :param Model: the latent variable Model to visualize. :param data_visualize: the object used to visualize the data which has been modelled. :type data_visualize: visualize.data_show type. :param latent_axes: the axes where the latent visualization should be plotted. """ if vals == None: - vals = model.X[0] + vals = Model.X[0] data_show.__init__(self, vals, axes=latent_axes) @@ -68,13 +68,13 @@ class lvm(data_show): self.cid = latent_axes[0].figure.canvas.mpl_connect('axes_enter_event', self.on_enter) self.data_visualize = data_visualize - self.model = model + self.Model = Model self.latent_axes = latent_axes self.sense_axes = sense_axes self.called = False self.move_on = False self.latent_index = latent_index - self.latent_dim = model.input_dim + self.latent_dim = Model.input_dim # The red cross which shows current latent point. self.latent_values = vals @@ -85,7 +85,7 @@ class lvm(data_show): def modify(self, vals): """When latent values are modified update the latent representation and ulso update the output visualization.""" self.vals = vals.copy() - y = self.model.predict(self.vals)[0] + y = self.Model.predict(self.vals)[0] self.data_visualize.modify(y) self.latent_handle.set_data(self.vals[self.latent_index[0]], self.vals[self.latent_index[1]]) self.axes.figure.canvas.draw() @@ -113,15 +113,15 @@ class lvm(data_show): # A click in the bar chart axis for selection a dimension. if self.sense_axes != None: self.sense_axes.cla() - self.sense_axes.bar(np.arange(self.model.input_dim),1./self.model.input_sensitivity(),color='b') + self.sense_axes.bar(np.arange(self.Model.input_dim),1./self.Model.input_sensitivity(),color='b') if self.latent_index[1] == self.latent_index[0]: - self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='y') - self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='y') + self.sense_axes.bar(np.array(self.latent_index[0]),1./self.Model.input_sensitivity()[self.latent_index[0]],color='y') + self.sense_axes.bar(np.array(self.latent_index[1]),1./self.Model.input_sensitivity()[self.latent_index[1]],color='y') else: - self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='g') - self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='r') + self.sense_axes.bar(np.array(self.latent_index[0]),1./self.Model.input_sensitivity()[self.latent_index[0]],color='g') + self.sense_axes.bar(np.array(self.latent_index[1]),1./self.Model.input_sensitivity()[self.latent_index[1]],color='r') self.sense_axes.figure.canvas.draw() @@ -131,21 +131,21 @@ class lvm_subplots(lvm): latent_axes is a np array of dimension np.ceil(input_dim/2), one for each pair of the latent dimensions. """ - def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None): - self.nplots = int(np.ceil(model.input_dim/2.))+1 + def __init__(self, vals, Model, data_visualize, latent_axes=None, sense_axes=None): + self.nplots = int(np.ceil(Model.input_dim/2.))+1 assert len(latent_axes)==self.nplots if vals==None: - vals = model.X[0, :] + vals = Model.X[0, :] self.latent_values = vals for i, axis in enumerate(latent_axes): if i == self.nplots-1: - if self.nplots*2!=model.input_dim: + if self.nplots*2!=Model.input_dim: latent_index = [i*2, i*2] - lvm.__init__(self, self.latent_vals, model, data_visualize, axis, sense_axes, latent_index=latent_index) + lvm.__init__(self, self.latent_vals, Model, data_visualize, axis, sense_axes, latent_index=latent_index) else: latent_index = [i*2, i*2+1] - lvm.__init__(self, self.latent_vals, model, data_visualize, axis, latent_index=latent_index) + lvm.__init__(self, self.latent_vals, Model, data_visualize, axis, latent_index=latent_index) @@ -158,7 +158,7 @@ class lvm_dimselect(lvm): GPy.examples.dimensionality_reduction.BGPVLM_oil() """ - def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1]): + def __init__(self, vals, Model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1]): if latent_axes==None and sense_axes==None: self.fig,(latent_axes,self.sense_axes) = plt.subplots(1,2) elif sense_axes==None: @@ -167,14 +167,14 @@ class lvm_dimselect(lvm): else: self.sense_axes = sense_axes - lvm.__init__(self,vals,model,data_visualize,latent_axes,sense_axes,latent_index) + lvm.__init__(self,vals,Model,data_visualize,latent_axes,sense_axes,latent_index) print "use left and right mouse butons to select dimensions" def on_click(self, event): if event.inaxes==self.sense_axes: - new_index = max(0,min(int(np.round(event.xdata-0.5)),self.model.input_dim-1)) + new_index = max(0,min(int(np.round(event.xdata-0.5)),self.Model.input_dim-1)) if event.button == 1: # Make it red if and y-axis (red=port=left) if it is a left button click self.latent_index[1] = new_index @@ -185,7 +185,7 @@ class lvm_dimselect(lvm): self.show_sensitivities() self.latent_axes.cla() - self.model.plot_latent(which_indices=self.latent_index, + self.Model.plot_latent(which_indices=self.latent_index, ax=self.latent_axes) self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0] self.modify(self.latent_values) @@ -199,7 +199,7 @@ class lvm_dimselect(lvm): def on_leave(self,event): latent_values = self.latent_values.copy() - y = self.model.predict(latent_values[None,:])[0] + y = self.Model.predict(latent_values[None,:])[0] self.data_visualize.modify(y)