From 51e48f750859d6ecef3be8ad956558edcefb36d0 Mon Sep 17 00:00:00 2001 From: James Hensman Date: Wed, 16 Jul 2014 09:40:44 +0100 Subject: [PATCH 1/6] fixed a bug in optimize restarts: it now used optimizer_array --- GPy/core/model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 3acb64b9..1595e347 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -61,7 +61,7 @@ class Model(Parameterized): on the current machine. """ - initial_parameters = self.optimizer_array + initial_parameters = self.optimizer_array.copy() if parallel: try: @@ -97,9 +97,9 @@ class Model(Parameterized): if len(self.optimization_runs): i = np.argmin([o.f_opt for o in self.optimization_runs]) - self._set_params_transformed(self.optimization_runs[i].x_opt) + self.optimizer_array = self.optimization_runs[i].x_opt else: - self._set_params_transformed(initial_parameters) + self.optimizer_array = initial_parameters def ensure_default_constraints(self, warning=True): """ From 8c80fb9c52e679d427f279441f70381a170032c3 Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 22 Jul 2014 09:21:01 -0700 Subject: [PATCH 2/6] [inference] less constant jitter, and jitter adjustements Conflicts: GPy/util/linalg.py --- GPy/core/sparse_gp.py | 8 ++++-- .../latent_function_inference/var_dtc.py | 10 +------ .../matplot_dep/dim_reduction_plots.py | 3 +- GPy/util/linalg.py | 28 ++++++++++++++----- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index 301d4b49..803aa29f 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -60,10 +60,14 @@ class SparseGP(GP): self.likelihood.update_gradients(self.grad_dict['dL_dthetaL']) if isinstance(self.X, VariationalPosterior): #gradients wrt kernel - dL_dKmm = self.grad_dict.pop('dL_dKmm') + dL_dKmm = self.grad_dict['dL_dKmm'] self.kern.update_gradients_full(dL_dKmm, self.Z, None) target = self.kern.gradient.copy() - self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2']) + self.kern.update_gradients_expectations(variational_posterior=self.X, + Z=self.Z, + dL_dpsi0=self.grad_dict['dL_dpsi0'], + dL_dpsi1=self.grad_dict['dL_dpsi1'], + dL_dpsi2=self.grad_dict['dL_dpsi2']) self.kern.gradient += target #gradients wrt Z diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py index 78f4b6f7..b3a09f8f 100644 --- a/GPy/inference/latent_function_inference/var_dtc.py +++ b/GPy/inference/latent_function_inference/var_dtc.py @@ -194,7 +194,7 @@ class VarDTC(LatentFunctionInference): return post, log_marginal, grad_dict class VarDTCMissingData(LatentFunctionInference): - const_jitter = 1e-6 + const_jitter = 1e-10 def __init__(self, limit=1, inan=None): from ...util.caching import Cacher self._Y = Cacher(self._subarray_computations, limit) @@ -289,13 +289,6 @@ class VarDTCMissingData(LatentFunctionInference): Lm = jitchol(Kmm) if uncertain_inputs: LmInv = dtrtri(Lm) - #VVT_factor_all = np.empty(Y.shape) - #full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1] - #if not full_VVT_factor: - # psi1V = np.dot(Y.T*beta_all, psi1_all).T - - #logger.info('computing dimension-wise likelihood and derivatives') - #size = len(Ys) size = Y.shape[1] next_ten = 0 for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)): @@ -348,7 +341,6 @@ class VarDTCMissingData(LatentFunctionInference): VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs) - #import ipdb;ipdb.set_trace() dL_dpsi0_all[v] += dL_dpsi0 dL_dpsi1_all[v, :] += dL_dpsi1 if uncertain_inputs: diff --git a/GPy/plotting/matplot_dep/dim_reduction_plots.py b/GPy/plotting/matplot_dep/dim_reduction_plots.py index 0ba082df..bac3dee0 100644 --- a/GPy/plotting/matplot_dep/dim_reduction_plots.py +++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py @@ -30,7 +30,7 @@ def most_significant_input_dimensions(model, which_indices): def plot_latent(model, labels=None, which_indices=None, resolution=50, ax=None, marker='o', s=40, fignum=None, plot_inducing=False, legend=True, - plot_limits=None, + plot_limits=None, aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}): """ :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc) @@ -84,6 +84,7 @@ def plot_latent(model, labels=None, which_indices=None, cmap=pb.cm.binary, **imshow_kwargs) # make sure labels are in order of input: + labels = np.asarray(labels) ulabels = [] for lab in labels: if not lab in ulabels: diff --git a/GPy/util/linalg.py b/GPy/util/linalg.py index bb381665..517c0b52 100644 --- a/GPy/util/linalg.py +++ b/GPy/util/linalg.py @@ -15,6 +15,7 @@ import scipy import warnings import os from config import * +import logging _scipyversion = np.float64((scipy.__version__).split('.')[:2]) _fix_dpotri_scipy_bug = True @@ -93,14 +94,20 @@ def jitchol(A, maxtries=5): raise linalg.LinAlgError, "not pd: non-positive diagonal elements" jitter = diagA.mean() * 1e-6 while maxtries > 0 and np.isfinite(jitter): - print 'Warning: adding jitter of {:.10e}'.format(jitter) try: - return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True) + L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True) except: jitter *= 10 finally: maxtries -= 1 raise linalg.LinAlgError, "not positive definite, even with jitter." + import traceback + try: raise + except: + logging.warning('\n'.join(['Added jitter of {:.10e}'.format(jitter), + ' in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]])) + import ipdb;ipdb.set_trace() + return L @@ -110,7 +117,7 @@ def jitchol(A, maxtries=5): # """ # Wrapper for lapack dtrtri function # Inverse of L -# +# # :param L: Triangular Matrix L # :param lower: is matrix lower (true) or upper (false) # :returns: Li, info @@ -122,10 +129,17 @@ def dtrtrs(A, B, lower=1, trans=0, unitdiag=0): """ Wrapper for lapack dtrtrs function + DTRTRS solves a triangular system of the form + + A * X = B or A**T * X = B, + + where A is a triangular matrix of order N, and B is an N-by-NRHS + matrix. A check is made to verify that A is nonsingular. + :param A: Matrix A(triangular) :param B: Matrix B :param lower: is matrix lower (true) or upper (false) - :returns: + :returns: Solution to A * X = B or A**T * X = B """ A = np.asfortranarray(A) @@ -146,11 +160,11 @@ def dpotrs(A, B, lower=1): def dpotri(A, lower=1): """ Wrapper for lapack dpotri function - + DPOTRI - compute the inverse of a real symmetric positive definite matrix A using the Cholesky factorization A = U**T*U or A = L*L**T computed by DPOTRF - + :param A: Matrix A :param lower: is matrix lower (true) or upper (false) :returns: A inverse @@ -159,7 +173,7 @@ def dpotri(A, lower=1): if _fix_dpotri_scipy_bug: assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays" lower = 0 - + A = force_F_ordered(A) R, info = lapack.dpotri(A, lower=lower) #needs to be zero here, seems to be a scipy bug From ec5e9443ce8e4184ed85efeff09728b33b0426f5 Mon Sep 17 00:00:00 2001 From: Neil Lawrence Date: Thu, 24 Jul 2014 20:55:18 +0100 Subject: [PATCH 3/6] Changes to datasets.py --- GPy/util/datasets.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/GPy/util/datasets.py b/GPy/util/datasets.py index cc0cfc49..133a79e2 100644 --- a/GPy/util/datasets.py +++ b/GPy/util/datasets.py @@ -659,6 +659,22 @@ def ripley_synth(data_set='ripley_prnn_data'): ytest = test[:, 2:3] return data_details_return({'X': X, 'Y': y, 'Xtest': Xtest, 'Ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set) +def global_average_temperature(data_set='global_temperature', num_train=1000, refresh_data=False): + path = os.path.join(data_path, data_set) + if data_available(data_set) and not refresh_data: + print 'Using cached version of the data set, to use latest version set refresh_data to True' + else: + download_data(data_set) + data = np.loadtxt(os.path.join(data_path, data_set, 'GLBTS.long.data')) + print 'Most recent data observation from month ', data[-1, 1], ' in year ', data[-1, 0] + allX = data[data[:, 3]!=-99.99, 2:3] + allY = data[data[:, 3]!=-99.99, 3:4] + X = allX[:num_train, 0:1] + Xtest = allX[num_train:, 0:1] + Y = allY[:num_train, 0:1] + Ytest = allY[num_train:, 0:1] + return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Mauna Loa data with " + str(num_train) + " values used as training points."}, data_set) + def mauna_loa(data_set='mauna_loa', num_train=545, refresh_data=False): path = os.path.join(data_path, data_set) if data_available(data_set) and not refresh_data: From ddcaf8f8b52f258492938a3e5cde07f6a181ad4d Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Tue, 29 Jul 2014 12:02:47 +0100 Subject: [PATCH 4/6] Added forced extraction of eggs (as we have a fair few non-py files and use the directory structure) added some files to MANIFEST and setup.py's package_data so its included upon distributing --- GPy/util/config.py | 4 ++-- MANIFEST.in | 4 ++++ setup.py | 9 ++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/GPy/util/config.py b/GPy/util/config.py index 28007fdf..6dad46c8 100644 --- a/GPy/util/config.py +++ b/GPy/util/config.py @@ -8,9 +8,9 @@ config = ConfigParser.ConfigParser() # This is the default configuration file that always needs to be present. default_file = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'defaults.cfg')) -# These files are optional +# These files are optional # This specifies configurations that are typically specific to the machine (it is found alongside the GPy installation). -local_file = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'machine.cfg')) +local_file = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'installation.cfg')) # This specifies configurations specific to the user (it is found in the user home directory) home = os.getenv('HOME') or os.getenv('USERPROFILE') diff --git a/MANIFEST.in b/MANIFEST.in index c89284cd..bcbf3583 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,3 +2,7 @@ include *.txt recursive-include doc *.txt include *.md recursive-include doc *.md +include *.cfg +recursive-include doc *.cfg +include *.json +recursive-include doc *.json diff --git a/setup.py b/setup.py index ace1d8b2..a89dc926 100644 --- a/setup.py +++ b/setup.py @@ -1,14 +1,16 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import os +#import os from setuptools import setup # Version number version = '0.4.6' +from pkg_resources import Requirement, resource_string def read(fname): - return open(os.path.join(os.path.dirname(__file__), fname)).read() + return resource_string(Requirement.parse("GPy"),fname) + #return open(os.path.join(os.path.dirname(__file__), fname)).read() setup(name = 'GPy', version = version, @@ -20,7 +22,7 @@ setup(name = 'GPy', url = "http://sheffieldml.github.com/GPy/", packages = ["GPy.models", "GPy.inference.optimization", "GPy.inference", "GPy.inference.latent_function_inference", "GPy.likelihoods", "GPy.mappings", "GPy.examples", "GPy.core.parameterization", "GPy.core", "GPy.testing", "GPy", "GPy.util", "GPy.kern", "GPy.kern._src.psi_comp", "GPy.kern._src", "GPy.plotting.matplot_dep.latent_space_visualizations.controllers", "GPy.plotting.matplot_dep.latent_space_visualizations", "GPy.plotting.matplot_dep", "GPy.plotting"], package_dir={'GPy': 'GPy'}, - package_data = {'GPy': ['GPy/examples']}, + package_data = {'GPy': ['defaults.cfg', 'installation.cfg', 'util/data_resources.json', 'util/football_teams.json']}, py_modules = ['GPy.__init__'], long_description=read('README.md'), install_requires=['numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1', 'nose'], @@ -29,4 +31,5 @@ setup(name = 'GPy', }, classifiers=[ "License :: OSI Approved :: BSD License"], + zip_safe = False ) From ece85f02fe8f83556866341532c530f91d3b5511 Mon Sep 17 00:00:00 2001 From: Alan Saul Date: Tue, 29 Jul 2014 22:51:11 +0100 Subject: [PATCH 5/6] returned setup.py read to old version --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index a89dc926..40574a76 100644 --- a/setup.py +++ b/setup.py @@ -9,8 +9,7 @@ version = '0.4.6' from pkg_resources import Requirement, resource_string def read(fname): - return resource_string(Requirement.parse("GPy"),fname) - #return open(os.path.join(os.path.dirname(__file__), fname)).read() + return open(os.path.join(os.path.dirname(__file__), fname)).read() setup(name = 'GPy', version = version, From 93b92111f821d611b08089a13c7a64ae612eb9ae Mon Sep 17 00:00:00 2001 From: mzwiessele Date: Tue, 5 Aug 2014 08:27:40 -0700 Subject: [PATCH 6/6] [minor] minor changes --- GPy/core/model.py | 2 +- GPy/core/parameterization/parameter_core.py | 28 +-------------------- GPy/kern/_src/add.py | 4 +-- 3 files changed, 4 insertions(+), 30 deletions(-) diff --git a/GPy/core/model.py b/GPy/core/model.py index 1595e347..7feb72b2 100644 --- a/GPy/core/model.py +++ b/GPy/core/model.py @@ -349,7 +349,7 @@ class Model(Parameterized): numerical_gradient = (f1 - f2) / (2 * step) if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind] else: ratio = (f1 - f2) / (2 * step * gradient[xind]) - difference = np.abs((f1 - f2) / 2 / step - gradient[xind]) + difference = np.abs(numerical_gradient - gradient[xind]) if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: formatted_name = "\033[92m {0} \033[0m".format(names[nind]) diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index e359409e..2a036378 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -699,36 +699,10 @@ class OptimizationHandlable(Indexable): def _get_params_transformed(self): raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!" -# # transformed parameters (apply un-transformation rules) -# p = self.param_array.copy() -# [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] -# if self.has_parent() and self.constraints[__fixed__].size != 0: -# fixes = np.ones(self.size).astype(bool) -# fixes[self.constraints[__fixed__]] = FIXED -# return p[fixes] -# elif self._has_fixes(): -# return p[self._fixes_] -# return p # def _set_params_transformed(self, p): raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!" -# """ -# Set parameters p, but make sure they get transformed before setting. -# This means, the optimizer sees p, whereas the model sees transformed(p), -# such that, the parameters the model sees are in the right domain. -# """ -# if not(p is self.param_array): -# if self.has_parent() and self.constraints[__fixed__].size != 0: -# fixes = np.ones(self.size).astype(bool) -# fixes[self.constraints[__fixed__]] = FIXED -# self.param_array.flat[fixes] = p -# elif self._has_fixes(): self.param_array.flat[self._fixes_] = p -# else: self.param_array.flat = p -# [np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) -# for c, ind in self.constraints.iteritems() if c != __fixed__] -# self._trigger_params_changed() - def _trigger_params_changed(self, trigger_parent=True): """ First tell all children to update, @@ -736,7 +710,7 @@ class OptimizationHandlable(Indexable): If trigger_parent is True, we will tell the parent, otherwise not. """ - [p._trigger_params_changed(trigger_parent=False) for p in self.parameters] + [p._trigger_params_changed(trigger_parent=False) for p in self.parameters if not p.is_fixed] self.notify_observers(None, None if trigger_parent else -np.inf) def _size_transformed(self): diff --git a/GPy/kern/_src/add.py b/GPy/kern/_src/add.py index 12f5d444..ee743f8b 100644 --- a/GPy/kern/_src/add.py +++ b/GPy/kern/_src/add.py @@ -10,7 +10,7 @@ class Add(CombinationKernel): """ Add given list of kernels together. propagates gradients through. - + This kernel will take over the active dims of it's subkernels passed in. """ def __init__(self, subkerns, name='add'): @@ -40,7 +40,7 @@ class Add(CombinationKernel): return reduce(np.add, (p.Kdiag(X) for p in which_parts)) def update_gradients_full(self, dL_dK, X, X2=None): - [p.update_gradients_full(dL_dK, X, X2) for p in self.parts] + [p.update_gradients_full(dL_dK, X, X2) for p in self.parts if not p.is_fixed] def update_gradients_diag(self, dL_dK, X): [p.update_gradients_diag(dL_dK, X) for p in self.parts]