diff --git a/GPy/core/gp.py b/GPy/core/gp.py index bbd3939b..75e5d49a 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -6,14 +6,13 @@ import sys from .. import kern from .model import Model from .parameterization import ObsAr -from .model import Model from .mapping import Mapping -from .parameterization import ObsAr from .. import likelihoods from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation from .parameterization.variational import VariationalPosterior import logging +import warnings from GPy.util.normalizer import MeanNorm logger = logging.getLogger("GP") @@ -65,10 +64,14 @@ class GP(Model): self.Y = ObsAr(Y) self.Y_normalized = self.Y - assert Y.shape[0] == self.num_data + if Y.shape[0] != self.num_data: + #There can be cases where we want inputs than outputs, for example if we have multiple latent + #function values + warnings.warn("There are more rows in your input data X, \ + than in your output data Y, be VERY sure this is what you want") _, self.output_dim = self.Y.shape - #TODO: check the type of this is okay? + assert ((Y_metadata is None) or isinstance(Y_metadata, dict)) self.Y_metadata = Y_metadata assert isinstance(kernel, kern.Kern) @@ -296,7 +299,7 @@ class GP(Model): :type size: int. :param full_cov: whether to return the full covariance matrix, or just the diagonal. :type full_cov: bool. - :returns: Ysim: set of simulations + :returns: fsim: set of simulations :rtype: np.ndarray (N x samples) """ m, v = self._raw_predict(X, full_cov=full_cov) @@ -304,11 +307,11 @@ class GP(Model): m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v) v = v.reshape(m.size,-1) if len(v.shape)==3 else v if not full_cov: - Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T + fsim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T else: - Ysim = np.random.multivariate_normal(m.flatten(), v, size).T + fsim = np.random.multivariate_normal(m.flatten(), v, size).T - return Ysim + return fsim def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None): """ @@ -324,16 +327,16 @@ class GP(Model): :type noise_model: integer. :returns: Ysim: set of simulations, a Numpy array (N x samples). """ - Ysim = self.posterior_samples_f(X, size, full_cov=full_cov) - Ysim = self.likelihood.samples(Ysim, Y_metadata) - + fsim = self.posterior_samples_f(X, size, full_cov=full_cov) + Ysim = self.likelihood.samples(fsim, Y_metadata) return Ysim def plot_f(self, plot_limits=None, which_data_rows='all', which_data_ycols='all', fixed_inputs=[], levels=20, samples=0, fignum=None, ax=None, resolution=None, plot_raw=True, - linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'): + linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx', + apply_link=False): """ Plot the GP's view of the world, where the data is normalized and before applying a likelihood. This is a call to plot with plot_raw=True. @@ -370,6 +373,8 @@ class GP(Model): :type Y_metadata: dict :param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx') :type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib. + :param apply_link: if there is a link function of the likelihood, plot the link(f*) rather than f* + :type apply_link: boolean """ assert "matplotlib" in sys.modules, "matplotlib package has not been imported." from ..plotting.matplot_dep import models_plots @@ -382,7 +387,7 @@ class GP(Model): which_data_ycols, fixed_inputs, levels, samples, fignum, ax, resolution, plot_raw=plot_raw, Y_metadata=Y_metadata, - data_symbol=data_symbol, **kw) + data_symbol=data_symbol, apply_link=apply_link, **kw) def plot(self, plot_limits=None, which_data_rows='all', which_data_ycols='all', fixed_inputs=[], diff --git a/GPy/core/model.py b/GPy/core/model.py index 4108e72c..937d30e5 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -256,7 +256,7 @@ class Model(Parameterized): else: optimizer = optimization.get_optimizer(optimizer) opt = optimizer(start, model=self, max_iters=max_iters, **kwargs) - + with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook) as vo: opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads) vo.finish(opt) @@ -371,7 +371,12 @@ class Model(Parameterized): f1 = self._objective(xx) xx[xind] -= 2.*step f2 = self._objective(xx) - df_ratio = np.abs((f1 - f2) / min(f1, f2)) + #Avoid divide by zero, if any of the values are above 1e-15, otherwise both values are essentiall + #the same + if f1 > 1e-15 or f1 < -1e-15 or f2 > 1e-15 or f2 < -1e-15: + df_ratio = np.abs((f1 - f2) / min(f1, f2)) + else: + df_ratio = 1.0 df_unstable = df_ratio < df_tolerance numerical_gradient = (f1 - f2) / (2 * step) if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind] diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py index 2a0a2592..76b10f08 100644 --- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py +++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py @@ -64,3 +64,17 @@ class ExactGaussianInference(LatentFunctionInference): dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata) return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha} + + def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None): + """ + Leave one out error as found in + "Bayesian leave-one-out cross-validation approximations for Gaussian latent variable models" + Vehtari et al. 2014. + """ + g = posterior.woodbury_vector + c = posterior.woodbury_inv + c_diag = np.diag(c)[:, None] + neg_log_marginal_LOO = 0.5*np.log(2*np.pi) - 0.5*np.log(c_diag) + 0.5*(g**2)/c_diag + #believe from Predictive Approaches for Choosing Hyperparameters in Gaussian Processes + #this is the negative marginal LOO + return -neg_log_marginal_LOO diff --git a/GPy/inference/latent_function_inference/laplace.py b/GPy/inference/latent_function_inference/laplace.py index c6921f57..aefc82ac 100644 --- a/GPy/inference/latent_function_inference/laplace.py +++ b/GPy/inference/latent_function_inference/laplace.py @@ -19,6 +19,7 @@ def warning_on_one_line(message, category, filename, lineno, file=None, line=Non warnings.formatwarning = warning_on_one_line from scipy import optimize from . import LatentFunctionInference +from scipy.integrate import quad class Laplace(LatentFunctionInference): @@ -39,6 +40,85 @@ class Laplace(LatentFunctionInference): self.first_run = True self._previous_Ki_fhat = None + def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None, f_hat=None, W=None, Ki_W_i=None): + """ + Leave one out log predictive density as found in + "Bayesian leave-one-out cross-validation approximations for Gaussian latent variable models" + Vehtari et al. 2014. + """ + Ki_f_init = np.zeros_like(Y) + + if K is None: + K = kern.K(X) + + if f_hat is None: + f_hat, _ = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata) + + if W is None: + W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata) + + if Ki_W_i is None: + _, _, _, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave) + + logpdf_dfhat = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata) + + if W.shape[1] == 1: + W = np.diagflat(W) + + #Eq 14, and 16 + var_site = 1./np.diag(W)[:, None] + mu_site = f_hat + var_site*logpdf_dfhat + prec_site = 1./var_site + #Eq 19 + marginal_cov = Ki_W_i + marginal_mu = marginal_cov.dot(np.diagflat(prec_site)).dot(mu_site) + marginal_var = np.diag(marginal_cov)[:, None] + #Eq 30 with using site parameters instead of Gaussian site parameters + #(var_site instead of sigma^{2} ) + posterior_cav_var = 1./(1./marginal_var - 1./var_site) + posterior_cav_mean = posterior_cav_var*((1./marginal_var)*marginal_mu - (1./var_site)*Y) + + flat_y = Y.flatten() + flat_mu = posterior_cav_mean.flatten() + flat_var = posterior_cav_var.flatten() + + if Y_metadata is not None: + #Need to zip individual elements of Y_metadata aswell + Y_metadata_flat = {} + if Y_metadata is not None: + for key, val in Y_metadata.items(): + Y_metadata_flat[key] = np.atleast_1d(val).reshape(-1, 1) + + zipped_values = [] + + for i in range(Y.shape[0]): + y_m = {} + for key, val in Y_metadata_flat.items(): + if np.isscalar(val) or val.shape[0] == 1: + y_m[key] = val + else: + #Won't broadcast yet + y_m[key] = val[i] + zipped_values.append((flat_y[i], flat_mu[i], flat_var[i], y_m)) + else: + #Otherwise just pass along None's + zipped_values = zip(flat_y, flat_mu, flat_var, [None]*Y.shape[0]) + + def integral_generator(yi, mi, vi, yi_m): + def f(fi_star): + #More stable in the log space + p_fi = np.exp(likelihood.logpdf(fi_star, yi, yi_m) + - 0.5*np.log(2*np.pi*vi) + - 0.5*np.square(mi-fi_star)/vi) + return p_fi + return f + + #Eq 30 + p_ystar, _ = zip(*[quad(integral_generator(y, m, v, yi_m), -np.inf, np.inf) + for y, m, v, yi_m in zipped_values]) + p_ystar = np.array(p_ystar).reshape(-1, 1) + return np.log(p_ystar) + def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None): """ Returns a Posterior class containing essential quantities of the posterior diff --git a/GPy/kern/_src/independent_outputs.py b/GPy/kern/_src/independent_outputs.py index aa9dca80..6f8b7be1 100644 --- a/GPy/kern/_src/independent_outputs.py +++ b/GPy/kern/_src/independent_outputs.py @@ -8,7 +8,7 @@ import itertools def index_to_slices(index): """ - take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index. + take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index. e.g. >>> index = np.asarray([0,0,0,1,1,1,2,2,2]) @@ -79,10 +79,10 @@ class IndependentOutputs(CombinationKernel): def update_gradients_full(self,dL_dK,X,X2=None): slices = index_to_slices(X[:,self.index_dim]) - if self.single_kern: + if self.single_kern: target = np.zeros(self.kern.size) kerns = itertools.repeat(self.kern) - else: + else: kerns = self.kern target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)] def collate_grads(kern, i, dL, X, X2): @@ -94,7 +94,7 @@ class IndependentOutputs(CombinationKernel): else: slices2 = index_to_slices(X2[:,self.index_dim]) [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))] - if self.single_kern: + if self.single_kern: self.kern.gradient = target else: [kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))] @@ -104,12 +104,14 @@ class IndependentOutputs(CombinationKernel): kerns = itertools.repeat(self.kern) if self.single_kern else self.kern if X2 is None: # TODO: make use of index_to_slices + # FIXME: Broken as X is already sliced out + print "Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!" values = np.unique(X[:,self.index_dim]) slices = [X[:,self.index_dim]==i for i in values] [target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None)) for kern, s in zip(kerns, slices)] #slices = index_to_slices(X[:,self.index_dim]) - #[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s]) + #[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s]) # for s in slices_i] for kern, slices_i in zip(kerns, slices)] #import ipdb;ipdb.set_trace() #[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]), diff --git a/GPy/likelihoods/gaussian.py b/GPy/likelihoods/gaussian.py index 9ecf7dbf..9abb8cde 100644 --- a/GPy/likelihoods/gaussian.py +++ b/GPy/likelihoods/gaussian.py @@ -132,10 +132,8 @@ class Gaussian(Likelihood): :returns: log likelihood evaluated for this point :rtype: float """ - N = y.shape[0] ln_det_cov = np.log(self.variance) - - return -0.5*((y-link_f)**2/self.variance + ln_det_cov + np.log(2.*np.pi)) + return -(1.0/(2*self.variance))*((y-link_f)**2) - 0.5*ln_det_cov - 0.5*np.log(2.*np.pi) def dlogpdf_dlink(self, link_f, y, Y_metadata=None): """ @@ -220,7 +218,6 @@ class Gaussian(Likelihood): """ e = y - link_f s_4 = 1.0/(self.variance**2) - N = y.shape[0] dlik_dsigma = -0.5/self.variance + 0.5*s_4*np.square(e) return dlik_dsigma diff --git a/GPy/likelihoods/likelihood.py b/GPy/likelihoods/likelihood.py index c876100d..5388526e 100644 --- a/GPy/likelihoods/likelihood.py +++ b/GPy/likelihoods/likelihood.py @@ -114,21 +114,24 @@ class Likelihood(Parameterized): #Otherwise just pass along None's zipped_values = zip(flat_y_test, flat_mu_star, flat_var_star, [None]*y_test.shape[0]) - def integral_generator(y, m, v, y_m): - """Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*""" - def f(f_star): + def integral_generator(yi, mi, vi, yi_m): + """Generate a function which can be integrated + to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*""" + def f(fi_star): #exponent = np.exp(-(1./(2*v))*np.square(m-f_star)) #from GPy.util.misc import safe_exp #exponent = safe_exp(exponent) #return self.pdf(f_star, y, y_m)*exponent #More stable in the log space - return np.exp(self.logpdf(f_star, y, y_m) -(1./(2*v))*np.square(m-f_star)) + return np.exp(self.logpdf(fi_star, yi, yi_m) + - 0.5*np.log(2*np.pi*vi) + - 0.5*np.square(mi-fi_star)/vi) return f - scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v, y_m), -np.inf, np.inf) for y, m, v, y_m in zipped_values]) - scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1) - p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star) + p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf) + for yi, mi, vi, yi_m in zipped_values]) + p_ystar = np.array(p_ystar).reshape(-1, 1) return np.log(p_ystar) def _moments_match_ep(self,obs,tau,v): @@ -545,9 +548,9 @@ class Likelihood(Parameterized): #Parameters are stacked vertically. Must be listed in same order as 'get_param_names' # ensure we have gradients for every parameter we want to optimize - assert dlogpdf_dtheta.shape[0] == self.size #f, d x num_param array - assert dlogpdf_df_dtheta.shape[0] == self.size #f x d x num_param matrix or just f x num_param - assert d2logpdf_df2_dtheta.shape[0] == self.size #f x num_param matrix or f x d x num_param matrix, f x f x num_param or f x f x d x num_param + assert dlogpdf_dtheta.shape[0] == self.size #num_param array x f, d + assert dlogpdf_df_dtheta.shape[0] == self.size #num_param x f x d x matrix or just num_param x f + assert d2logpdf_df2_dtheta.shape[0] == self.size #num_param x f matrix or num_param x f x d x matrix, num_param x f x f or num_param x f x f x d return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta diff --git a/GPy/likelihoods/link_functions.py b/GPy/likelihoods/link_functions.py index 41bff1f8..3d753395 100644 --- a/GPy/likelihoods/link_functions.py +++ b/GPy/likelihoods/link_functions.py @@ -4,9 +4,7 @@ import numpy as np from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf import scipy as sp - -_exp_lim_val = np.finfo(np.float64).max -_lim_val = np.log(_exp_lim_val) +from ..util.misc import safe_exp, safe_square, safe_cube, safe_quad, safe_three_times class GPTransformation(object): """ @@ -63,12 +61,13 @@ class Identity(GPTransformation): def d3transf_df3(self,f): return np.zeros_like(f) + class Probit(GPTransformation): """ .. math:: g(f) = \\Phi^{-1} (mu) - + """ def transf(self,f): return std_norm_cdf(f) @@ -80,7 +79,7 @@ class Probit(GPTransformation): return -f * std_norm_pdf(f) def d3transf_df3(self,f): - return (np.square(f)-1.)*std_norm_pdf(f) + return (safe_square(f)-1.)*std_norm_pdf(f) class Cloglog(GPTransformation): @@ -96,19 +95,23 @@ class Cloglog(GPTransformation): """ def transf(self,f): - return 1-np.exp(-np.exp(f)) + ef = safe_exp(f) + return 1-np.exp(-ef) def dtransf_df(self,f): - return np.exp(f-np.exp(f)) + ef = safe_exp(f) + return np.exp(f-ef) def d2transf_df2(self,f): - ef = np.exp(f) + ef = safe_exp(f) return -np.exp(f-ef)*(ef-1.) def d3transf_df3(self,f): - ef = np.exp(f) - return np.exp(f-ef)*(1.-3*ef + ef**2) - + ef = safe_exp(f) + ef2 = safe_square(ef) + three_times_ef = safe_three_times(ef) + r_val = np.exp(f-ef)*(1.-three_times_ef + ef2) + return r_val class Log(GPTransformation): """ @@ -118,16 +121,16 @@ class Log(GPTransformation): """ def transf(self,f): - return np.exp(np.clip(f, -np.inf, _lim_val)) + return safe_exp(f) def dtransf_df(self,f): - return np.exp(np.clip(f, -np.inf, _lim_val)) + return safe_exp(f) def d2transf_df2(self,f): - return np.exp(np.clip(f, -np.inf, _lim_val)) + return safe_exp(f) def d3transf_df3(self,f): - return np.exp(np.clip(f, -np.inf, _lim_val)) + return safe_exp(f) class Log_ex_1(GPTransformation): """ @@ -137,17 +140,20 @@ class Log_ex_1(GPTransformation): """ def transf(self,f): - return np.log(1.+np.exp(f)) + return np.log1p(safe_exp(f)) def dtransf_df(self,f): - return np.exp(f)/(1.+np.exp(f)) + ef = safe_exp(f) + return ef/(1.+ef) def d2transf_df2(self,f): - aux = np.exp(f)/(1.+np.exp(f)) + ef = safe_exp(f) + aux = ef/(1.+ef) return aux*(1.-aux) def d3transf_df3(self,f): - aux = np.exp(f)/(1.+np.exp(f)) + ef = safe_exp(f) + aux = ef/(1.+ef) daux_df = aux*(1.-aux) return daux_df - (2.*aux*daux_df) @@ -155,14 +161,17 @@ class Reciprocal(GPTransformation): def transf(self,f): return 1./f - def dtransf_df(self,f): - return -1./(f**2) + def dtransf_df(self, f): + f2 = safe_square(f) + return -1./f2 - def d2transf_df2(self,f): - return 2./(f**3) + def d2transf_df2(self, f): + f3 = safe_cube(f) + return 2./f3 def d3transf_df3(self,f): - return -6./(f**4) + f4 = safe_quad(f) + return -6./f4 class Heaviside(GPTransformation): """ diff --git a/GPy/models/__init__.py b/GPy/models/__init__.py index 8f8fd838..0d18eb47 100644 --- a/GPy/models/__init__.py +++ b/GPy/models/__init__.py @@ -11,7 +11,7 @@ from .sparse_gplvm import SparseGPLVM from .warped_gp import WarpedGP from .bayesian_gplvm import BayesianGPLVM from .mrd import MRD -from .gradient_checker import GradientChecker +from .gradient_checker import GradientChecker, HessianChecker, SkewChecker from .ss_gplvm import SSGPLVM from .gp_coregionalized_regression import GPCoregionalizedRegression from .sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index 7cbd69eb..3ac703fe 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -24,7 +24,7 @@ class BayesianGPLVM(SparseGP_MPI): def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, normalizer=None, - missing_data=False, stochastic=False, batchsize=1): + missing_data=False, stochastic=False, batchsize=1, Y_metadata=None): self.logger = logging.getLogger(self.__class__.__name__) if X is None: @@ -69,6 +69,7 @@ class BayesianGPLVM(SparseGP_MPI): name=name, inference_method=inference_method, normalizer=normalizer, mpi_comm=mpi_comm, variational_prior=self.variational_prior, + Y_metadata=Y_metadata ) self.link_parameter(self.X, index=0) @@ -83,7 +84,7 @@ class BayesianGPLVM(SparseGP_MPI): def parameters_changed(self): super(BayesianGPLVM,self).parameters_changed() if isinstance(self.inference_method, VarDTC_minibatch): - return + return kl_fctr = 1. self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X) diff --git a/GPy/models/gradient_checker.py b/GPy/models/gradient_checker.py index 74026f8e..c2cde834 100644 --- a/GPy/models/gradient_checker.py +++ b/GPy/models/gradient_checker.py @@ -5,6 +5,8 @@ from ..core.model import Model import itertools import numpy from ..core.parameterization import Param +np = numpy +from ..util.block_matrices import get_blocks, get_block_shapes, unblock, get_blocks_3d, get_block_shapes_3d def get_shape(x): if isinstance(x, numpy.ndarray): @@ -111,3 +113,261 @@ class GradientChecker(Model): #for name, shape in zip(self.names, self.shapes): #_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape)))))) #return _param_names + + +class HessianChecker(GradientChecker): + + def __init__(self, f, df, ddf, x0, names=None, *args, **kwargs): + """ + :param f: Function (only used for numerical hessian gradient) + :param df: Gradient of function to check + :param ddf: Analytical gradient function + :param x0: + Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names). + Can be a list of arrays, if takes a list of arrays. This list will be passed + to f and df in the same order as given here. + If only one argument, make sure not to pass a list!!! + + :type x0: [array-like] | array-like | float | int + :param names: + Names to print, when performing gradcheck. If a list was passed to x0 + a list of names with the same length is expected. + :param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs) + + """ + super(HessianChecker, self).__init__(df, ddf, x0, names=names, *args, **kwargs) + self._f = f + self._df = df + self._ddf = ddf + + def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False): + """ + Overwrite checkgrad method to check whole block instead of looping through + + Shows diagnostics using matshow instead + + :param verbose: If True, print a "full" checking of each parameter + :type verbose: bool + :param step: The size of the step around which to linearise the objective + :type step: float (default 1e-6) + :param tolerance: the tolerance allowed (see note) + :type tolerance: float (default 1e-3) + + Note:- + The gradient is considered correct if the ratio of the analytical + and numerical gradients is within of unity. + """ + try: + import numdifftools as nd + except: + raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests") + + if target_param: + raise NotImplementedError('Only basic functionality is provided with this gradchecker') + + #Repeat for each parameter, not the nicest but shouldn't be many cases where there are many + #variables + current_index = 0 + for name, shape in zip(self.names, self.shapes): + current_size = numpy.prod(shape) + x = self.optimizer_array.copy() + #x = self._get_params_transformed().copy() + x = x[current_index:current_index + current_size].reshape(shape) + + # Check gradients + analytic_hess = self._ddf(x) + if analytic_hess.shape[1] == 1: + analytic_hess = numpy.diagflat(analytic_hess) + + #From the docs: + #x0 : vector location + #at which to differentiate fun + #If x0 is an N x M array, then fun is assumed to be a function + #of N*M variables., thus we must have it flat, not (N,1), but just (N,) + #numeric_hess_partial = nd.Hessian(self._f, vectorized=False) + numeric_hess_partial = nd.Jacobian(self._df, vectorized=False) + #numeric_hess_partial = nd.Derivative(self._df, vectorized=True) + numeric_hess = numeric_hess_partial(x) + + check_passed = self.checkgrad_block(analytic_hess, numeric_hess, verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=plot) + current_index += current_size + return check_passed + + def checkgrad_block(self, analytic_hess, numeric_hess, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False): + """ + Checkgrad a block matrix + """ + if analytic_hess.dtype is np.dtype('object'): + #Make numeric hessian also into a block matrix + real_size = get_block_shapes(analytic_hess) + num_elements = np.sum(real_size) + if (num_elements, num_elements) == numeric_hess.shape: + #If the sizes are the same we assume they are the same + #(we have not fixed any values so the numeric is the whole hessian) + numeric_hess = get_blocks(numeric_hess, real_size) + else: + #Make a fake empty matrix and fill out the correct block + tmp_numeric_hess = get_blocks(np.zeros((num_elements, num_elements)), real_size) + tmp_numeric_hess[block_indices] = numeric_hess.copy() + numeric_hess = tmp_numeric_hess + + if block_indices is not None: + #Extract the right block + analytic_hess = analytic_hess[block_indices] + numeric_hess = numeric_hess[block_indices] + else: + #Unblock them if they are in blocks and you aren't checking a single block (checking whole hessian) + if analytic_hess.dtype is np.dtype('object'): + analytic_hess = unblock(analytic_hess) + numeric_hess = unblock(numeric_hess) + + ratio = numeric_hess / (numpy.where(analytic_hess==0, 1e-10, analytic_hess)) + difference = numpy.abs(analytic_hess - numeric_hess) + + check_passed = numpy.all((numpy.abs(1 - ratio)) < tolerance) or numpy.allclose(numeric_hess, analytic_hess, atol = tolerance) + + if verbose: + if block_indices: + print "\nBlock {}".format(block_indices) + else: + print "\nAll blocks" + + header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference'] + header_string = map(lambda x: ' | '.join(header), [header]) + separator = '-' * len(header_string[0]) + print '\n'.join([header_string[0], separator]) + min_r = '%.6f' % float(numpy.min(ratio)) + max_r = '%.6f' % float(numpy.max(ratio)) + max_d = '%.6f' % float(numpy.max(difference)) + min_d = '%.6f' % float(numpy.min(difference)) + cols = [max_r, min_r, min_d, max_d] + + if check_passed: + checked = "\033[92m True \033[0m" + else: + checked = "\033[91m False \033[0m" + + grad_string = "{} | {} | {} | {} | {} ".format(checked, cols[0], cols[1], cols[2], cols[3]) + print grad_string + + if plot: + import pylab as pb + fig, axes = pb.subplots(2, 2) + max_lim = numpy.max(numpy.vstack((analytic_hess, numeric_hess))) + min_lim = numpy.min(numpy.vstack((analytic_hess, numeric_hess))) + msa = axes[0,0].matshow(analytic_hess, vmin=min_lim, vmax=max_lim) + axes[0,0].set_title('Analytic hessian') + axes[0,0].xaxis.set_ticklabels([None]) + axes[0,0].yaxis.set_ticklabels([None]) + axes[0,0].xaxis.set_ticks([None]) + axes[0,0].yaxis.set_ticks([None]) + msn = axes[0,1].matshow(numeric_hess, vmin=min_lim, vmax=max_lim) + pb.colorbar(msn, ax=axes[0,1]) + axes[0,1].set_title('Numeric hessian') + axes[0,1].xaxis.set_ticklabels([None]) + axes[0,1].yaxis.set_ticklabels([None]) + axes[0,1].xaxis.set_ticks([None]) + axes[0,1].yaxis.set_ticks([None]) + msr = axes[1,0].matshow(ratio) + pb.colorbar(msr, ax=axes[1,0]) + axes[1,0].set_title('Ratio') + axes[1,0].xaxis.set_ticklabels([None]) + axes[1,0].yaxis.set_ticklabels([None]) + axes[1,0].xaxis.set_ticks([None]) + axes[1,0].yaxis.set_ticks([None]) + msd = axes[1,1].matshow(difference) + pb.colorbar(msd, ax=axes[1,1]) + axes[1,1].set_title('difference') + axes[1,1].xaxis.set_ticklabels([None]) + axes[1,1].yaxis.set_ticklabels([None]) + axes[1,1].xaxis.set_ticks([None]) + axes[1,1].yaxis.set_ticks([None]) + if block_indices: + fig.suptitle("Block: {}".format(block_indices)) + pb.show() + + return check_passed + +class SkewChecker(HessianChecker): + + def __init__(self, df, ddf, dddf, x0, names=None, *args, **kwargs): + """ + :param df: gradient of function + :param ddf: Gradient of function to check (hessian) + :param dddf: Analytical gradient function (third derivative) + :param x0: + Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names). + Can be a list of arrays, if takes a list of arrays. This list will be passed + to f and df in the same order as given here. + If only one argument, make sure not to pass a list!!! + + :type x0: [array-like] | array-like | float | int + :param names: + Names to print, when performing gradcheck. If a list was passed to x0 + a list of names with the same length is expected. + :param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs) + + """ + super(SkewChecker, self).__init__(df, ddf, dddf, x0, names=names, *args, **kwargs) + + def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False, super_plot=False): + """ + Gradient checker that just checks each hessian individually + + super_plot will plot the hessian wrt every parameter, plot will just do the first one + """ + try: + import numdifftools as nd + except: + raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests") + + if target_param: + raise NotImplementedError('Only basic functionality is provided with this gradchecker') + + #Repeat for each parameter, not the nicest but shouldn't be many cases where there are many + #variables + current_index = 0 + for name, n_shape in zip(self.names, self.shapes): + current_size = numpy.prod(n_shape) + x = self.optimizer_array.copy() + #x = self._get_params_transformed().copy() + x = x[current_index:current_index + current_size].reshape(n_shape) + + # Check gradients + #Actually the third derivative + analytic_hess = self._ddf(x) + + #Can only calculate jacobian for one variable at a time + #From the docs: + #x0 : vector location + #at which to differentiate fun + #If x0 is an N x M array, then fun is assumed to be a function + #of N*M variables., thus we must have it flat, not (N,1), but just (N,) + #numeric_hess_partial = nd.Hessian(self._f, vectorized=False) + #Actually _df is already the hessian + numeric_hess_partial = nd.Jacobian(self._df, vectorized=True) + numeric_hess = numeric_hess_partial(x) + + print "Done making numerical hessian" + if analytic_hess.dtype is np.dtype('object'): + #Blockify numeric_hess aswell + blocksizes, pagesizes = get_block_shapes_3d(analytic_hess) + #HACK + real_block_size = np.sum(blocksizes) + numeric_hess = numeric_hess.reshape(real_block_size, real_block_size, pagesizes) + #numeric_hess = get_blocks_3d(numeric_hess, blocksizes)#, pagesizes) + else: + numeric_hess = numeric_hess.reshape(*analytic_hess.shape) + + #Check every block individually (for ease) + check_passed = [False]*numeric_hess.shape[2] + for block_ind in xrange(numeric_hess.shape[2]): + #Unless super_plot is set, just plot the first one + p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False + if verbose: + print "Checking derivative of hessian wrt parameter number {}".format(block_ind) + check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p) + + current_index += current_size + return np.all(check_passed) + diff --git a/GPy/plotting/matplot_dep/models_plots.py b/GPy/plotting/matplot_dep/models_plots.py index 5cdf69fc..0cda12f1 100644 --- a/GPy/plotting/matplot_dep/models_plots.py +++ b/GPy/plotting/matplot_dep/models_plots.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Copyright (c) 2012-2015, GPy authors (see AUTHORS.txt). # Licensed under the BSD 3-clause license (see LICENSE.txt) try: @@ -16,7 +16,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', which_data_ycols='all', fixed_inputs=[], levels=20, samples=0, fignum=None, ax=None, resolution=None, plot_raw=False, - linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx'): + linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx', + apply_link=False, samples_f=0, plot_uncertain_inputs=True): """ Plot the posterior of the GP. - In one dimension, the function is plotted with a shaded region identifying two standard deviations. @@ -38,7 +39,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', :type resolution: int :param levels: number of levels to plot in a contour plot. :type levels: int - :param samples: the number of a posteriori samples to plot + :param samples: the number of a posteriori samples to plot p(y*|y) :type samples: int :param fignum: figure to plot on. :type fignum: figure number @@ -49,6 +50,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', :type linecol: :param fillcol: color of fill :param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure + :param apply_link: apply the link function if plotting f (default false) + :type apply_link: boolean + :param samples_f: the number of posteriori f samples to plot p(f*|y) + :type samples_f: int """ #deal with optional arguments if which_data_rows == 'all': @@ -88,8 +93,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', #make a prediction on the frame and plot it if plot_raw: m, v = model._raw_predict(Xgrid) - lower = m - 2*np.sqrt(v) - upper = m + 2*np.sqrt(v) + if apply_link: + lower = model.likelihood.gp_link.transf(m - 2*np.sqrt(v)) + upper = model.likelihood.gp_link.transf(m + 2*np.sqrt(v)) + #Once transformed this is now the median of the function + m = model.likelihood.gp_link.transf(m) + else: + lower = m - 2*np.sqrt(v) + upper = m + 2*np.sqrt(v) else: if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression): meta = {'output_index': Xgrid[:,-1:].astype(np.int)} @@ -110,13 +121,31 @@ def plot_fit(model, plot_limits=None, which_data_rows='all', plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs. + if samples_f: #NOTE not tested with fixed_inputs + Fsim = model.posterior_samples_f(Xgrid, samples_f) + for fi in Fsim.T: + plots['posterior_samples_f'] = ax.plot(Xnew, fi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25) + #ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs. + #add error bars for uncertain (if input uncertainty is being modelled) - if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs(): - plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(), - xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()), - ecolor='k', fmt=None, elinewidth=.5, alpha=.5) - + if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs() and plot_uncertain_inputs: + if plot_raw: + #add error bars for uncertain (if input uncertainty is being modelled), for plot_f + #Hack to plot error bars on latent function, rather than on the data + vs = model.X.mean.values.copy() + for i,v in fixed_inputs: + vs[:,i] = v + m_X, _ = model._raw_predict(vs) + if apply_link: + m_X = model.likelihood.gp_link.transf(m_X) + plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), m_X[which_data_rows, which_data_ycols].flatten(), + xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()), + ecolor='k', fmt=None, elinewidth=.5, alpha=.5) + else: + plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(), + xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()), + ecolor='k', fmt=None, elinewidth=.5, alpha=.5) #set the limits of the plot to some sensible values ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper)) @@ -186,3 +215,29 @@ def plot_fit_f(model, *args, **kwargs): """ kwargs['plot_raw'] = True plot_fit(model,*args, **kwargs) + +def fixed_inputs(model, non_fixed_inputs, fix_routine='median'): + """ + Convenience function for returning back fixed_inputs where the other inputs + are fixed using fix_routine + :param model: model + :type model: Model + :param non_fixed_inputs: dimensions of non fixed inputs + :type non_fixed_inputs: list + :param fix_routine: fixing routine to use, 'mean', 'median', 'zero' + :type fix_routine: string + """ + f_inputs = [] + if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs(): + X = model.X.mean.values.copy() + else: + X = model.X.values.copy() + for i in range(X.shape[1]): + if i not in non_fixed_inputs: + if fix_routine == 'mean': + f_inputs.append( (i, np.mean(X[:,i])) ) + if fix_routine == 'median': + f_inputs.append( (i, np.median(X[:,i])) ) + elif fix_routine == 'zero': + f_inputs.append( (i, 0) ) + return f_inputs diff --git a/GPy/testing/link_function_tests.py b/GPy/testing/link_function_tests.py new file mode 100644 index 00000000..fb8fba99 --- /dev/null +++ b/GPy/testing/link_function_tests.py @@ -0,0 +1,143 @@ +import numpy as np +import scipy as sp +from scipy.special import cbrt +from GPy.models import GradientChecker +_lim_val = np.finfo(np.float64).max +_lim_val_exp = np.log(_lim_val) +_lim_val_square = np.sqrt(_lim_val) +_lim_val_cube = cbrt(_lim_val) +from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside + +class LinkFunctionTests(np.testing.TestCase): + def setUp(self): + self.small_f = np.array([[-1e-4]]) + self.zero_f = np.array([[1e-4]]) + self.mid_f = np.array([[5.0]]) + self.large_f = np.array([[1e4]]) + self.f_lower_lim = np.array(-np.inf) + self.f_upper_lim = np.array(np.inf) + + def check_gradient(self, link_func, lim_of_inf, test_lim=False): + grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.mid_f) + self.assertTrue(grad.checkgrad(verbose=True)) + grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.mid_f) + self.assertTrue(grad2.checkgrad(verbose=True)) + grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.mid_f) + self.assertTrue(grad3.checkgrad(verbose=True)) + + grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.small_f) + self.assertTrue(grad.checkgrad(verbose=True)) + grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.small_f) + self.assertTrue(grad2.checkgrad(verbose=True)) + grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.small_f) + self.assertTrue(grad3.checkgrad(verbose=True)) + + grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.zero_f) + self.assertTrue(grad.checkgrad(verbose=True)) + grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.zero_f) + self.assertTrue(grad2.checkgrad(verbose=True)) + grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.zero_f) + self.assertTrue(grad3.checkgrad(verbose=True)) + + #Do a limit test if the large f value is too large + large_f = np.clip(self.large_f, -np.inf, lim_of_inf-1e-3) + grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=large_f) + self.assertTrue(grad.checkgrad(verbose=True)) + grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=large_f) + self.assertTrue(grad2.checkgrad(verbose=True)) + grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=large_f) + self.assertTrue(grad3.checkgrad(verbose=True)) + + if test_lim: + print "Testing limits" + #Remove some otherwise we are too close to the limit for gradcheck to work effectively + lim_of_inf = lim_of_inf - 1e-4 + grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf) + self.assertTrue(grad.checkgrad(verbose=True)) + grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=lim_of_inf) + self.assertTrue(grad2.checkgrad(verbose=True)) + grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=lim_of_inf) + self.assertTrue(grad3.checkgrad(verbose=True)) + + def check_overflow(self, link_func, lim_of_inf): + #Check that it does something sensible beyond this limit, + #note this is not checking the value is correct, just that it isn't nan + beyond_lim_of_inf = lim_of_inf + 100.0 + self.assertFalse(np.isinf(link_func.transf(beyond_lim_of_inf))) + self.assertFalse(np.isinf(link_func.dtransf_df(beyond_lim_of_inf))) + self.assertFalse(np.isinf(link_func.d2transf_df2(beyond_lim_of_inf))) + + self.assertFalse(np.isnan(link_func.transf(beyond_lim_of_inf))) + self.assertFalse(np.isnan(link_func.dtransf_df(beyond_lim_of_inf))) + self.assertFalse(np.isnan(link_func.d2transf_df2(beyond_lim_of_inf))) + + def test_log_overflow(self): + link = Log() + lim_of_inf = _lim_val_exp + + np.testing.assert_almost_equal(np.exp(self.mid_f), link.transf(self.mid_f)) + assert np.isinf(np.exp(np.log(self.f_upper_lim))) + #Check the clipping works + np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5) + #Need to look at most significant figures here rather than the decimals + np.testing.assert_approx_equal(link.transf(self.f_upper_lim), _lim_val, significant=5) + self.check_overflow(link, lim_of_inf) + + #Check that it would otherwise fail + beyond_lim_of_inf = lim_of_inf + 10.0 + old_err_state = np.seterr(over='ignore') + self.assertTrue(np.isinf(np.exp(beyond_lim_of_inf))) + np.seterr(**old_err_state) + + def test_log_ex_1_overflow(self): + link = Log_ex_1() + lim_of_inf = _lim_val_exp + + np.testing.assert_almost_equal(np.log1p(np.exp(self.mid_f)), link.transf(self.mid_f)) + assert np.isinf(np.log1p(np.exp(np.log(self.f_upper_lim)))) + #Check the clipping works + np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5) + #Need to look at most significant figures here rather than the decimals + np.testing.assert_approx_equal(link.transf(self.f_upper_lim), np.log1p(_lim_val), significant=5) + self.check_overflow(link, lim_of_inf) + + #Check that it would otherwise fail + beyond_lim_of_inf = lim_of_inf + 10.0 + old_err_state = np.seterr(over='ignore') + self.assertTrue(np.isinf(np.log1p(np.exp(beyond_lim_of_inf)))) + np.seterr(**old_err_state) + + + def test_log_gradients(self): + # transf dtransf_df d2transf_df2 d3transf_df3 + link = Log() + lim_of_inf = _lim_val_exp + self.check_gradient(link, lim_of_inf, test_lim=True) + + def test_identity_gradients(self): + link = Identity() + lim_of_inf = _lim_val + #FIXME: Should be able to think of a way to test the limits of this + self.check_gradient(link, lim_of_inf, test_lim=False) + + def test_probit_gradients(self): + link = Probit() + lim_of_inf = _lim_val + self.check_gradient(link, lim_of_inf, test_lim=True) + + def test_Cloglog_gradients(self): + link = Cloglog() + lim_of_inf = _lim_val_exp + self.check_gradient(link, lim_of_inf, test_lim=True) + + def test_Log_ex_1_gradients(self): + link = Log_ex_1() + lim_of_inf = _lim_val_exp + self.check_gradient(link, lim_of_inf, test_lim=True) + self.check_overflow(link, lim_of_inf) + + def test_reciprocal_gradients(self): + link = Reciprocal() + lim_of_inf = _lim_val + #Does not work with much smaller values, and values closer to zero than 1e-5 + self.check_gradient(link, lim_of_inf, test_lim=True) diff --git a/GPy/util/block_matrices.py b/GPy/util/block_matrices.py index a047abc6..e1e04aaa 100644 --- a/GPy/util/block_matrices.py +++ b/GPy/util/block_matrices.py @@ -1,9 +1,37 @@ -# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Copyright (c) 2014-2015, Alan Saul # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np +def get_blocks_3d(A, blocksizes, pagesizes=None): + """ + Given a 3d matrix, make a block matrix, where the first and second dimensions are blocked according + to blocksizes, and the pages are blocked using pagesizes + """ + assert (A.shape[0]==A.shape[1]) and len(A.shape)==3, "can't blockify this non-square matrix, may need to use 2d version" + N = np.sum(blocksizes) + assert A.shape[0] == N, "bad blocksizes" + num_blocks = len(blocksizes) + if pagesizes == None: + #Assume each page of A should be its own dimension + pagesizes = range(A.shape[2])#[0]*A.shape[2] + num_pages = len(pagesizes) + B = np.empty(shape=(num_blocks, num_blocks, num_pages), dtype=np.object) + count_k = 0 + #for Bk, k in enumerate(pagesizes): + for Bk in pagesizes: + count_i = 0 + for Bi, i in enumerate(blocksizes): + count_j = 0 + for Bj, j in enumerate(blocksizes): + #We want to have it count_k:count_k + k but its annoying as it makes a NxNx1 array is page sizes are set to 1 + B[Bi, Bj, Bk] = A[count_i:count_i + i, count_j:count_j + j, Bk] + count_j += j + count_i += i + #count_k += k + return B + def get_blocks(A, blocksizes): - assert (A.shape[0]==A.shape[1]) and len(A.shape)==2, "can;t blockify this non-square matrix" + assert (A.shape[0]==A.shape[1]) and len(A.shape)==2, "can't blockify this non-square matrix" N = np.sum(blocksizes) assert A.shape[0] == N, "bad blocksizes" num_blocks = len(blocksizes) @@ -17,6 +45,11 @@ def get_blocks(A, blocksizes): count_i += i return B +def get_block_shapes_3d(B): + assert B.dtype is np.dtype('object'), "Must be a block matrix" + #FIXME: This isn't general AT ALL... + return get_block_shapes(B[:,:,0]), B.shape[2] + def get_block_shapes(B): assert B.dtype is np.dtype('object'), "Must be a block matrix" return [B[b,b].shape[0] for b in range(0, B.shape[0])] @@ -35,7 +68,7 @@ def unblock(B): count_i += i return A -def block_dot(A, B): +def block_dot(A, B, diagonal=False): """ Element wise dot product on block matricies @@ -48,21 +81,30 @@ def block_dot(A, B): +-------------+ +------+------+ +-------+-------+ ..Note + If any block of either (A or B) are stored as 1d vectors then we assume + that it denotes a diagonal matrix efficient dot product using numpy + broadcasting will be used, i.e. A11*B11 + If either (A or B) of the diagonal matrices are stored as vectors then a more efficient dot product using numpy broadcasting will be used, i.e. A11*B11 """ #Must have same number of blocks and be a block matrix assert A.dtype is np.dtype('object'), "Must be a block matrix" assert B.dtype is np.dtype('object'), "Must be a block matrix" - Ashape = A.shape - Bshape = B.shape - assert Ashape == Bshape - def f(A,B): - if Ashape[0] == Ashape[1] or Bshape[0] == Bshape[1]: - #FIXME: Careful if one is transpose of other, would make a matrix - return A*B + assert A.shape == B.shape + def f(C,D): + """ + C is an element of A, D is the associated element of B + """ + Cshape = C.shape + Dshape = D.shape + if diagonal and (len(Cshape) == 1 or len(Dshape) == 1\ + or C.shape[0] != C.shape[1] or D.shape[0] != D.shape[1]): + print "Broadcasting, C: {} D:{}".format(C.shape, D.shape) + return C*D else: - return np.dot(A,B) + print "Dotting, C: {} C:{}".format(C.shape, D.shape) + return np.dot(C,D) dot = np.vectorize(f, otypes = [np.object]) return dot(A,B) diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index 8ac5418f..26c4b774 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -20,7 +20,7 @@ try: from scipy import weave except ImportError: config.set('weave', 'working', 'False') - + _scipyversion = np.float64((scipy.__version__).split('.')[:2]) _fix_dpotri_scipy_bug = True @@ -102,7 +102,6 @@ def jitchol(A, maxtries=5): num_tries = 1 while num_tries <= maxtries and np.isfinite(jitter): try: - print(jitter) L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True) return L except: @@ -115,7 +114,6 @@ def jitchol(A, maxtries=5): except: logging.warning('\n'.join(['Added jitter of {:.10e}'.format(jitter), ' in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]])) - import ipdb;ipdb.set_trace() return L # def dtrtri(L, lower=1): diff --git a/GPy/util/misc.py b/GPy/util/misc.py index 431d6f8f..ddc29fa0 100644 --- a/GPy/util/misc.py +++ b/GPy/util/misc.py @@ -2,18 +2,37 @@ # Licensed under the BSD 3-clause license (see LICENSE.txt) import numpy as np +from scipy.special import cbrt from .config import * _lim_val = np.finfo(np.float64).max - _lim_val_exp = np.log(_lim_val) _lim_val_square = np.sqrt(_lim_val) -_lim_val_cube = np.power(_lim_val, -3) +#_lim_val_cube = cbrt(_lim_val) +_lim_val_cube = np.nextafter(_lim_val**(1/3.0), -np.inf) +_lim_val_quad = np.nextafter(_lim_val**(1/4.0), -np.inf) +_lim_val_three_times = np.nextafter(_lim_val/3.0, -np.inf) def safe_exp(f): clip_f = np.clip(f, -np.inf, _lim_val_exp) return np.exp(clip_f) +def safe_square(f): + f = np.clip(f, -np.inf, _lim_val_square) + return f**2 + +def safe_cube(f): + f = np.clip(f, -np.inf, _lim_val_cube) + return f**3 + +def safe_quad(f): + f = np.clip(f, -np.inf, _lim_val_quad) + return f**4 + +def safe_three_times(f): + f = np.clip(f, -np.inf, _lim_val_three_times) + return 3*f + def chain_1(df_dg, dg_dx): """ Generic chaining function for first derivative @@ -39,8 +58,8 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2): return d2f_dg2 if len(d2f_dg2) > 1 and len(d2f_dg2.shape)>1 and d2f_dg2.shape[-1] > 1: raise NotImplementedError('Not implemented for matricies yet') - #dg_dx_2 = np.clip(dg_dx, 1e-12, _lim_val_square)**2 - dg_dx_2 = dg_dx**2 + dg_dx_2 = np.clip(dg_dx, -np.inf, _lim_val_square)**2 + #dg_dx_2 = dg_dx**2 return d2f_dg2*(dg_dx_2) + df_dg*d2g_dx2 def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3): @@ -55,8 +74,8 @@ def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3): if ( (len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1) or (len(d3f_dg3) > 1 and d3f_dg3.shape[-1] > 1)): raise NotImplementedError('Not implemented for matricies yet') - #dg_dx_3 = np.clip(dg_dx, 1e-12, _lim_val_cube)**3 - dg_dx_3 = dg_dx**3 + dg_dx_3 = np.clip(dg_dx, -np.inf, _lim_val_cube)**3 + #dg_dx_3 = dg_dx**3 return d3f_dg3*(dg_dx_3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3 def opt_wrapper(m, **kwargs):