diff --git a/AUTHORS.txt b/AUTHORS.txt index fc9e0fb7..f81db5ec 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -3,4 +3,5 @@ Nicolo Fusi Ricardo Andrade Nicolas Durrande Alan Saul +Max Zwiessele Neil D. Lawrence diff --git a/GPy/core/model.py b/GPy/core/model.py index f2b188d9..5dc6b254 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -447,7 +447,7 @@ class model(parameterised): assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods" ll_change = epsilon + 1. iteration = 0 - last_ll = -np.exp(1000) + last_ll = -np.inf convergence = False alpha = 0 diff --git a/GPy/core/transformations.py b/GPy/core/transformations.py index fcbfb548..2dbe33af 100644 --- a/GPy/core/transformations.py +++ b/GPy/core/transformations.py @@ -39,8 +39,8 @@ class logexp(transformation): return '(+ve)' class logexp_clipped(transformation): - max_bound = 1e250 - min_bound = 1e-9 + max_bound = 1e100 + min_bound = 1e-10 log_max_bound = np.log(max_bound) log_min_bound = np.log(min_bound) def __init__(self, lower=1e-6): @@ -49,15 +49,15 @@ class logexp_clipped(transformation): def f(self, x): exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound)) f = np.log(1. + exp) - if np.isnan(f).any(): - import ipdb;ipdb.set_trace() - return f +# if np.isnan(f).any(): +# import ipdb;ipdb.set_trace() + return np.clip(f, self.min_bound, self.max_bound) def finv(self, f): - return np.log(np.exp(np.clip(f, self.min_bound, self.max_bound)) - 1.) + return np.log(np.exp(f - 1.)) def gradfactor(self, f): ef = np.exp(f) # np.clip(f, self.min_bound, self.max_bound)) gf = (ef - 1.) / ef - return np.where(f < self.lower, 0, gf) + return gf # np.where(f < self.lower, 0, gf) def initialize(self, f): if np.any(f < 0.): print "Warning: changing parameters to satisfy constraints" diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 1f30a67f..029d812d 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -266,15 +266,19 @@ def bgplvm_simulation(optimize='scg', if optimize: print "Optimizing model:" - m.optimize('scg', max_iters=max_f_eval, max_f_eval=max_f_eval, messages=True) + m.optimize('bfgs', max_iters=max_f_eval, + max_f_eval=max_f_eval, + messages=True, gtol=1e-2) if plot: import pylab m.plot_X_1d() - pylab.figure(); pylab.axis(); m.kern.plot_ARD() + pylab.figure('BGPLVM Simulation ARD Parameters'); + pylab.axis(); + m.kern.plot_ARD() return m def mrd_simulation(optimize=True, plot_sim=False, **kw): - D1, D2, D3, N, M, Q = 150, 250, 30, 200, 3, 7 + D1, D2, D3, N, M, Q = 150, 200, 400, 300, 3, 7 slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim) from GPy.models import mrd @@ -283,21 +287,21 @@ def mrd_simulation(optimize=True, plot_sim=False, **kw): reload(mrd); reload(kern) - k = kern.linear(Q, [0.01] * Q, True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) - m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", initz='permute', **kw) + k = kern.linear(Q, [0.05] * Q, True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) + m = mrd.MRD(Ylist, Q=Q, M=M, kernels=k, initx="concat", initz='permute', **kw) for i, Y in enumerate(Ylist): m['{}_noise'.format(i + 1)] = Y.var() / 100. - # m.constrain('variance|noise', logexp_clipped()) + # m.constrain('variance|noise', logexp_clipped(1e-6)) m.ensure_default_constraints() # DEBUG - np.seterr("raise") + # np.seterr("raise") if optimize: print "Optimizing Model:" - m.optimize('scg', messages=1, max_iters=3e3) + m.optimize('bfgs', messages=1, max_iters=3e3) return m @@ -305,12 +309,13 @@ def brendan_faces(): from GPy import kern data = GPy.util.datasets.brendan_faces() Q = 2 - # Y = data['Y'][0:-1:2, :] - Y = data['Y'] + Y = data['Y'][0:-1:10, :] + # Y = data['Y'] Yn = Y - Y.mean() Yn /= Yn.std() - m = GPy.models.GPLVM(Yn, Q)#, M=Y.shape[0]/4) + m = GPy.models.GPLVM(Yn, Q) + # m = GPy.models.Bayesian_GPLVM(Yn, Q, M=100) # optimize m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped()) @@ -318,7 +323,7 @@ def brendan_faces(): m.ensure_default_constraints() m.optimize('scg', messages=1, max_f_eval=10000) - ax = m.plot_latent() + ax = m.plot_latent(which_indices=(0,1)) y = m.likelihood.Y[0, :] data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False) lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) diff --git a/GPy/inference/SCG.py b/GPy/inference/SCG.py index d0a30f0d..fc8ce21a 100644 --- a/GPy/inference/SCG.py +++ b/GPy/inference/SCG.py @@ -25,6 +25,13 @@ import numpy as np import sys + +def print_out(len_maxiters, display, fnow, current_grad, beta, iteration): + if display: + print '\r', + print '{0:>0{mi}g} {1:> 12e} {2:> 12e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', + sys.stdout.flush() + def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=None, ftol=None, gtol=None): """ Optimisation through Scaled Conjugate Gradients (SCG) @@ -64,8 +71,9 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto iteration = 0 + len_maxiters = len(str(maxiters)) if display: - print ' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len(str(maxiters))) + print ' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters) # Main optimization loop. while iteration < maxiters: @@ -97,7 +105,8 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto if function_eval >= max_f_eval: status = "Maximum number of function evaluations exceeded" - return x, flog, function_eval, status + break +# return x, flog, function_eval, status Delta = 2.*(fnew - fold) / (alpha * mu) if Delta >= 0.: @@ -113,17 +122,14 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto flog.append(fnow) # Current function value iteration += 1 - if display: - print '\r', - print '{0:>0{mi}g} {1:> 12e} {2:> 12e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len(str(maxiters))), - # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', - sys.stdout.flush() + print_out(len_maxiters, display, fnow, current_grad, beta, iteration) if success: # Test for termination if (np.max(np.abs(alpha * d)) < xtol) or (np.abs(fnew - fold) < ftol): status = 'converged' - return x, flog, function_eval, status + break +# return x, flog, function_eval, status else: # Update variables for new position @@ -158,5 +164,6 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto status = "maxiter exceeded" if display: + print_out(len_maxiters, display, fnow, current_grad, beta, iteration) print "" return x, flog, function_eval, status diff --git a/GPy/kern/rbf.py b/GPy/kern/rbf.py index c413b469..8b644241 100644 --- a/GPy/kern/rbf.py +++ b/GPy/kern/rbf.py @@ -56,6 +56,13 @@ class rbf(kernpart): self._Z, self._mu, self._S = np.empty(shape=(3,1)) self._X, self._X2, self._params = np.empty(shape=(3,1)) + #a set of optional args to pass to weave + self.weave_options = {'headers' : [''], + 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], + 'extra_link_args' : ['-lgomp']} + + + def _get_params(self): return np.hstack((self.variance,self.lengthscale)) @@ -85,8 +92,43 @@ class rbf(kernpart): self._K_computations(X,X2) target[0] += np.sum(self._K_dvar*dL_dK) if self.ARD: - if X2 is None: X2 = X - [np.add(target[1+q:2+q],(self.variance/self.lengthscale[q]**3)*np.sum(self._K_dvar*dL_dK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)] + dvardLdK = self._K_dvar*dL_dK + var_len3 = self.variance/np.power(self.lengthscale,3) + if X2 is None: + #save computation for the symmetrical case + dvardLdK += dvardLdK.T + code = """ + int q,i,j; + double tmp; + for(q=0; q'], - 'extra_compile_args': ['-fopenmp -O3'], #-march=native'], - 'extra_link_args' : ['-lgomp']} - N,Q = mu.shape M = Zhat.shape[0] @@ -288,6 +326,6 @@ class rbf(kernpart): """ weave.inline(code, support_code=support_code, libraries=['gomp'], arg_names=['N','M','Q','mu','Zhat','mudist_sq','mudist','lengthscale2','_psi2_denom','psi2_Zdist_sq','psi2_exponent','half_log_psi2_denom','psi2','variance_sq'], - type_converters=weave.converters.blitz,**weave_options) + type_converters=weave.converters.blitz,**self.weave_options) return mudist,mudist_sq, psi2_exponent, psi2 diff --git a/GPy/likelihoods/EP.py b/GPy/likelihoods/EP.py index 4e1e0c5b..efca0649 100644 --- a/GPy/likelihoods/EP.py +++ b/GPy/likelihoods/EP.py @@ -319,7 +319,8 @@ class EP(likelihood): Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde) Diag = Diag0 * Iplus_Dprod_i P = Iplus_Dprod_i[:,None] * P0 - L = jitchol(np.eye(M) + np.dot(RPT0,((1. - Iplus_Dprod_i)/Diag0)[:,None]*RPT0.T)) + safe_diag = np.where(Diag0 < self.tau_tilde, self.tau_tilde/(1.+Diag0*self.tau_tilde), (1. - Iplus_Dprod_i)/Diag0) + L = jitchol(np.eye(M) + np.dot(RPT0,safe_diag[:,None]*RPT0.T)) R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1) RPT = np.dot(R,P.T) Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) diff --git a/GPy/likelihoods/Gaussian.py b/GPy/likelihoods/Gaussian.py index 98227b16..e08fee90 100644 --- a/GPy/likelihoods/Gaussian.py +++ b/GPy/likelihoods/Gaussian.py @@ -53,7 +53,7 @@ class Gaussian(likelihood): def _set_params(self, x): x = float(x) if self._variance != x: - self.precision = 1. / max(x, 1e-6) + self.precision = 1. / x self.covariance_matrix = np.eye(self.N) * x self.V = (self.precision) * self.Y self._variance = x diff --git a/GPy/models/Bayesian_GPLVM.py b/GPy/models/Bayesian_GPLVM.py index e1e99af9..5f22526b 100644 --- a/GPy/models/Bayesian_GPLVM.py +++ b/GPy/models/Bayesian_GPLVM.py @@ -14,6 +14,7 @@ import itertools from matplotlib.colors import colorConverter from matplotlib.figure import SubplotParams from GPy.inference.optimization import SCG +from GPy.util import plot_latent class Bayesian_GPLVM(sparse_GP, GPLVM): """ @@ -93,8 +94,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self))) return x + def _clipped(self, x): + return x # np.clip(x, -1e300, 1e300) + def _set_params(self, x, save_old=True, save_count=0): # try: + x = self._clipped(x) N, Q = self.N, self.Q self.X = x[:self.X.size].reshape(N, Q).copy() self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy() @@ -176,20 +181,10 @@ class Bayesian_GPLVM(sparse_GP, GPLVM): # ======================== self.dbound_dmuS = np.hstack((d_dmu, d_dS)) self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self) - return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta)) + return self._clipped(np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))) - def plot_latent(self, which_indices=None, *args, **kwargs): - - if which_indices is None: - try: - input_1, input_2 = np.argsort(self.input_sensitivity())[:2] - except: - raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" - else: - input_1, input_2 = which_indices - ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs) - ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w') - return ax + def plot_latent(self, *args, **kwargs): + plot_latent.plot_latent_indices(self, *args, **kwargs) def do_test_latents(self, Y): """ diff --git a/GPy/models/GP.py b/GPy/models/GP.py index 8f7ef9d3..e6c4c1d6 100644 --- a/GPy/models/GP.py +++ b/GPy/models/GP.py @@ -173,7 +173,7 @@ class GP(model): """ # normalize X values Xnew = (Xnew.copy() - self._Xmean) / self._Xstd - mu, var = self._raw_predict(Xnew, which_parts=which_parts, full_cov=full_cov) + mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) # now push through likelihood mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov) diff --git a/GPy/models/GPLVM.py b/GPy/models/GPLVM.py index ff2be732..7445d0ab 100644 --- a/GPy/models/GPLVM.py +++ b/GPy/models/GPLVM.py @@ -11,6 +11,8 @@ from ..util.linalg import pdinv, PCA from GP import GP from ..likelihoods import Gaussian from .. import util +from GPy.util import plot_latent + class GPLVM(GP): """ @@ -60,75 +62,5 @@ class GPLVM(GP): mu, var, upper, lower = self.predict(Xnew) pb.plot(mu[:,0], mu[:,1],'k',linewidth=1.5) - def plot_latent(self, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40): - """ - :param labels: a np.array of size self.N containing labels for the points (can be number, strings, etc) - :param resolution: the resolution of the grid on which to evaluate the predictive variance - """ - if ax is None: - ax = pb.gca() - util.plot.Tango.reset() - - if labels is None: - labels = np.ones(self.N) - if which_indices is None: - if self.Q==1: - input_1 = 0 - input_2 = None - if self.Q==2: - input_1, input_2 = 0,1 - else: - try: - input_1, input_2 = np.argsort(self.input_sensitivity())[:2] - except: - raise ValueError, "cannot Atomatically determine which dimensions to plot, please pass 'which_indices'" - else: - input_1, input_2 = which_indices - - #first, plot the output variance as a function of the latent space - Xtest, xx,yy,xmin,xmax = util.plot.x_frame2D(self.X[:,[input_1, input_2]],resolution=resolution) - Xtest_full = np.zeros((Xtest.shape[0], self.X.shape[1])) - Xtest_full[:, :2] = Xtest - mu, var, low, up = self.predict(Xtest_full) - var = var[:, :1] - ax.imshow(var.reshape(resolution, resolution).T, - extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower') - - # make sure labels are in order of input: - ulabels = [] - for lab in labels: - if not lab in ulabels: - ulabels.append(lab) - - for i, ul in enumerate(ulabels): - if type(ul) is np.string_: - this_label = ul - elif type(ul) is np.int64: - this_label = 'class %i'%ul - else: - this_label = 'class %i'%i - if len(marker) == len(ulabels): - m = marker[i] - else: - m = marker - - index = np.nonzero(labels==ul)[0] - if self.Q==1: - x = self.X[index,input_1] - y = np.zeros(index.size) - else: - x = self.X[index,input_1] - y = self.X[index,input_2] - ax.scatter(x, y, marker=m, s=s, color=util.plot.Tango.nextMedium(), mew=1.3, label=this_label) - - ax.set_xlabel('latent dimension %i'%input_1) - ax.set_ylabel('latent dimension %i'%input_2) - - if not np.all(labels==1.): - ax.legend(loc=0,numpoints=1) - - ax.set_xlim(xmin[0],xmax[0]) - ax.set_ylim(xmin[1],xmax[1]) - ax.grid(b=False) # remove the grid if present, it doesn't look good - ax.set_aspect('auto') # set a nice aspect ratio - return ax + def plot_latent(self, *args, **kwargs): + util.plot_latent.plot_latent(self, *args, **kwargs) \ No newline at end of file diff --git a/GPy/models/mrd.py b/GPy/models/mrd.py index 74f526d0..89b5d730 100644 --- a/GPy/models/mrd.py +++ b/GPy/models/mrd.py @@ -11,6 +11,7 @@ from scipy import linalg import numpy import itertools import pylab +from GPy.kern.kern import kern class MRD(model): """ @@ -19,7 +20,7 @@ class MRD(model): N must be shared across datasets though. :param likelihood_list...: likelihoods of observed datasets - :type likelihood_list: [GPy.likelihood] + :type likelihood_list: [GPy.likelihood] | [Y1..Yy] :param names: names for different gplvm models :type names: [str] :param Q: latent dimensionality (will raise @@ -38,85 +39,33 @@ class MRD(model): number of inducing inputs to use :param Z: initial inducing inputs - :param kernel: - kernel to use + :param kernels: list of kernels or kernel shared for all BGPLVMS + :type kernels: [GPy.kern.kern] | GPy.kern.kern | None (default) """ - def __init__(self,likelihood_list,Q,M=10,names=None,kernels=None,initX='PCA',initz='permute',_debug=False, **kwargs): + def __init__(self, likelihood_or_Y_list, Q, M=10, names=None, + kernels=None, initx='PCA', + initz='permute', _debug=False, **kw): if names is None: - self.names = ["{}".format(i + 1) for i in range(len(likelihood_list))] + self.names = ["{}".format(i + 1) for i in range(len(likelihood_or_Y_list))] - #sort out the kernels + # sort out the kernels if kernels is None: - kernels = [None]*len(likelihood_list) - elif isinstance(kernels,kern.kern): - kernels = [kernels.copy() for i in range(len(likelihood_list))] + kernels = [None] * len(likelihood_or_Y_list) + elif isinstance(kernels, kern): + kernels = [kernels.copy() for i in range(len(likelihood_or_Y_list))] else: - assert len(kernels)==len(likelihood_list), "need one kernel per output" - assert all([isinstance(k, kern.kern) for k in kernels]), "invalid kernel object detected!" + assert len(kernels) == len(likelihood_or_Y_list), "need one kernel per output" + assert all([isinstance(k, kern) for k in kernels]), "invalid kernel object detected!" + assert not ('kernel' in kw), "pass kernels through `kernels` argument" self.Q = Q self.M = M - self.N = self.gref.N - self.NQ = self.N * self.Q - self.MQ = self.M * self.Q + self._debug = _debug self._init = True - X = self._init_X(initx, likelihood_list) + X = self._init_X(initx, likelihood_or_Y_list) Z = self._init_Z(initz, X) - self.bgplvms = [Bayesian_GPLVM(l, k, X=X, Z=Z, M=self.M, **kwargs) for l,k in zip(likelihood_list,kernels)] - - del self._init - - self.gref = self.bgplvms[0] - nparams = numpy.array([0] + [sparse_GP._get_params(g).size - g.Z.size for g in self.bgplvms]) - self.nparams = nparams.cumsum() - - model.__init__(self) # @UndefinedVariable - - - - def __init__(self, *likelihood_list, **kwargs): - if kwargs.has_key("_debug"): - self._debug = kwargs['_debug'] - del kwargs['_debug'] - else: - self._debug = False - if kwargs.has_key("names"): - self.names = kwargs['names'] - del kwargs['names'] - else: - self.names = ["{}".format(i + 1) for i in range(len(likelihood_list))] - if kwargs.has_key('kernel'): - kernel = kwargs['kernel'] - k = lambda: kernel.copy() - del kwargs['kernel'] - else: - k = lambda: None - if kwargs.has_key('initx'): - initx = kwargs['initx'] - del kwargs['initx'] - else: - initx = "PCA" - if kwargs.has_key('initz'): - initz = kwargs['initz'] - del kwargs['initz'] - else: - initz = "permute" - try: - self.Q = kwargs["Q"] - except KeyError: - raise ValueError("Need Q for MRD") - try: - self.M = kwargs["M"] - del kwargs["M"] - except KeyError: - self.M = 10 - - self._init = True - X = self._init_X(initx, likelihood_list) - Z = self._init_Z(initz, X) - self.bgplvms = [Bayesian_GPLVM(Y, kernel=k(), X=X, Z=Z, M=self.M, **kwargs) for Y in likelihood_list] - + self.bgplvms = [Bayesian_GPLVM(l, Q=Q, kernel=k, X=X, Z=Z, M=self.M, **kw) for l, k in zip(likelihood_or_Y_list, kernels)] del self._init self.gref = self.bgplvms[0] @@ -212,13 +161,6 @@ class MRD(model): X = self.gref.X.ravel() X_var = self.gref.X_variance.ravel() Z = self.gref.Z.ravel() - - if self._debug: - for g in self.bgplvms: - assert numpy.allclose(g.X.ravel(), X) - assert numpy.allclose(g.X_variance.ravel(), X_var) - assert numpy.allclose(g.Z.ravel(), Z) - thetas = [sparse_GP._get_params(g)[g.Z.size:] for g in self.bgplvms] params = numpy.hstack([X, X_var, Z, numpy.hstack(thetas)]) return params @@ -241,12 +183,6 @@ class MRD(model): Z = x[start:end] thetas = x[end:] - if self._debug: - for g in self.bgplvms: - assert numpy.allclose(g.X, self.gref.X) - assert numpy.allclose(g.X_variance, self.gref.X_variance) - assert numpy.allclose(g.Z, self.gref.Z) - # set params for all: for g, s, e in itertools.izip(self.bgplvms, self.nparams, self.nparams[1:]): g._set_params(numpy.hstack([X, X_var, Z, thetas[s:e]])) @@ -259,7 +195,7 @@ class MRD(model): # g._computations() - def update_likelihood_approximation(self):#TODO: object oriented vs script base + def update_likelihood_approximation(self): # TODO: object oriented vs script base for bgplvm in self.bgplvms: bgplvm.update_likelihood_approximation() @@ -287,15 +223,21 @@ class MRD(model): def _init_X(self, init='PCA', likelihood_list=None): if likelihood_list is None: likelihood_list = self.likelihood_list - if init in "PCA_single": - X = numpy.zeros((likelihood_list[0].Y.shape[0], self.Q)) - for qs, Y in itertools.izip(numpy.array_split(numpy.arange(self.Q), len(likelihood_list)), likelihood_list): - X[:, qs] = PCA(Y.Y, len(qs))[0] - elif init in "PCA_concat": - X = PCA(numpy.hstack([l.Y for l in likelihood_list]), self.Q)[0] - #X = PCA(numpy.hstack(likelihood_list), self.Q)[0] + Ylist = [] + for likelihood_or_Y in likelihood_list: + if type(likelihood_or_Y) is numpy.ndarray: + Ylist.append(likelihood_or_Y) + else: + Ylist.append(likelihood_or_Y.Y) + del likelihood_list + if init in "PCA_concat": + X = PCA(numpy.hstack(Ylist), self.Q)[0] + elif init in "PCA_single": + X = numpy.zeros((Ylist[0].shape[0], self.Q)) + for qs, Y in itertools.izip(numpy.array_split(numpy.arange(self.Q), len(Ylist)), Ylist): + X[:, qs] = PCA(Y, len(qs))[0] else: # init == 'random': - X = numpy.random.randn(likelihood_list[0].Y.shape[0], self.Q) + X = numpy.random.randn(Ylist[0].shape[0], self.Q) self.X = X return X @@ -334,7 +276,7 @@ class MRD(model): return fig def plot_predict(self, fig_num="MRD Predictions", axes=None, **kwargs): - fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.predict(g.X)[0],**kwargs)) + fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.predict(g.X)[0], **kwargs)) return fig def plot_scales(self, fig_num="MRD Scales", axes=None, *args, **kwargs): diff --git a/GPy/models/sparse_GP.py b/GPy/models/sparse_GP.py index fe9d9dc4..fad18dc6 100644 --- a/GPy/models/sparse_GP.py +++ b/GPy/models/sparse_GP.py @@ -3,7 +3,7 @@ import numpy as np import pylab as pb -from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides,chol_inv +from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv from ..util.plot import gpplot from .. import kern from GP import GP @@ -149,9 +149,9 @@ class sparse_GP(GP): else: A = -0.5 * self.N * self.D * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT B = -0.5 * self.D * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self.A)) - C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2)) + C = -self.D * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.M * np.log(sf2)) D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V)) - return A + B + C + D + return A + B + C + D + self.likelihood.Z def _set_params(self, p): self.Z = p[:self.M * self.Q].reshape(self.M, self.Q) @@ -173,19 +173,19 @@ class sparse_GP(GP): For a Gaussian likelihood, no iteration is required: this function does nothing """ - if not isinstance(self.likelihood,Gaussian): #Updates not needed for Gaussian likelihood - self.likelihood.restart() #TODO check consistency with pseudo_EP + if not isinstance(self.likelihood, Gaussian): # Updates not needed for Gaussian likelihood + self.likelihood.restart() # TODO check consistency with pseudo_EP if self.has_uncertain_inputs: Lmi = chol_inv(self.Lm) Kmmi = tdot(Lmi.T) - diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2,Kmmi)]) + diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)]) - self.likelihood.fit_FITC(self.Kmm,self.psi1,diag_tr_psi2Kmmi) #This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion - #raise NotImplementedError, "EP approximation not implemented for uncertain inputs" + self.likelihood.fit_FITC(self.Kmm, self.psi1, diag_tr_psi2Kmmi) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion + # raise NotImplementedError, "EP approximation not implemented for uncertain inputs" else: self.likelihood.fit_DTC(self.Kmm, self.psi1) # self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0) - self._set_params(self._get_params()) # update the GP + self._set_params(self._get_params()) # update the GP def _log_likelihood_gradients(self): return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood))) @@ -209,7 +209,7 @@ class sparse_GP(GP): """ The derivative of the bound wrt the inducing inputs Z """ - dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ + dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ if self.has_uncertain_inputs: dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance) dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance) @@ -229,21 +229,56 @@ class sparse_GP(GP): mu = np.dot(Kx.T, self.Cpsi1V) if full_cov: Kxx = self.kern.K(Xnew, which_parts=which_parts) - var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting + var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting else: Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0) else: # assert which_parts=='all', "swithching out parts of variational kernels is not implemented" - Kx = self.kern.psi1(self.Z, Xnew, X_variance_new)#, which_parts=which_parts) TODO: which_parts + Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts mu = np.dot(Kx, self.Cpsi1V) if full_cov: raise NotImplementedError, "TODO" else: - Kxx = self.kern.psi0(self.Z,Xnew,X_variance_new) - psi2 = self.kern.psi2(self.Z,Xnew,X_variance_new) - var = Kxx - np.sum(np.sum(psi2*Kmmi_LmiBLmi[None,:,:],1),1) + Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new) + psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new) + var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) return mu, var[:, None] + def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False): + """ + Predict the function(s) at the new point(s) Xnew. + + Arguments + --------- + :param Xnew: The points at which to make a prediction + :type Xnew: np.ndarray, Nnew x self.Q + :param X_variance_new: The uncertainty in the prediction points + :type X_variance_new: np.ndarray, Nnew x self.Q + :param which_parts: specifies which outputs kernel(s) to use in prediction + :type which_parts: ('all', list of bools) + :param full_cov: whether to return the folll covariance matrix, or just the diagonal + :type full_cov: bool + :rtype: posterior mean, a Numpy array, Nnew x self.D + :rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise + :rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D + + + If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew. + This is to allow for different normalizations of the output dimensions. + + """ + # normalize X values + Xnew = (Xnew.copy() - self._Xmean) / self._Xstd + if X_variance_new is not None: + X_variance_new = X_variance_new / self._Xstd ** 2 + + # here's the actual prediction by the GP model + mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts) + + # now push through likelihood + mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov) + + return mean, var, _025pm, _975pm diff --git a/GPy/util/__init__.py b/GPy/util/__init__.py index 56dbd5b9..27d25518 100644 --- a/GPy/util/__init__.py +++ b/GPy/util/__init__.py @@ -13,3 +13,4 @@ import datasets import mocap import visualize import decorators +import classification diff --git a/GPy/util/classification.py b/GPy/util/classification.py new file mode 100644 index 00000000..07e725d1 --- /dev/null +++ b/GPy/util/classification.py @@ -0,0 +1,30 @@ +import numpy as np + +def conf_matrix(p,labels,names=['1','0'],threshold=.5,show=True): + """ + Returns true and false positives in a binary classification problem + - Column names: true class of the examples + - Row names: classification assigned by the model + + p: probabilities estimated for observation of belonging to class '1' + labels: observations' class + names: classes' names + threshold: probability value at which the model allocate an element to each class + show: whether the matrix should be shown or not + """ + p = p.flatten() + labels = labels.flatten() + N = p.size + C = np.ones(N) + C[p