Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-04-26 21:36:23 +02:00 · 2014-08-06 14:45:20 +01:00 · 2014-08-06 14:45:20 +01:00 · 140fdc8211
commit 140fdc8211
parent 128e894560 93b92111f8
11 changed files with 63 additions and 56 deletions
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -61,7 +61,7 @@ class Model(Parameterized):
        on the current machine.

        """
-        initial_parameters = self.optimizer_array
+        initial_parameters = self.optimizer_array.copy()

        if parallel:
            try:
@ -97,9 +97,9 @@ class Model(Parameterized):

        if len(self.optimization_runs):
            i = np.argmin([o.f_opt for o in self.optimization_runs])
-            self._set_params_transformed(self.optimization_runs[i].x_opt)
+            self.optimizer_array = self.optimization_runs[i].x_opt
        else:
-            self._set_params_transformed(initial_parameters)
+            self.optimizer_array = initial_parameters

    def ensure_default_constraints(self, warning=True):
        """
@ -349,7 +349,7 @@ class Model(Parameterized):
                numerical_gradient = (f1 - f2) / (2 * step)
                if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
                else: ratio = (f1 - f2) / (2 * step * gradient[xind])
-                difference = np.abs((f1 - f2) / 2 / step - gradient[xind])
+                difference = np.abs(numerical_gradient - gradient[xind])

                if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
                    formatted_name = "\033[92m {0} \033[0m".format(names[nind])
--- a/GPy/core/parameterization/parameter_core.py
+++ b/GPy/core/parameterization/parameter_core.py
@ -699,36 +699,10 @@ class OptimizationHandlable(Indexable):

    def _get_params_transformed(self):
        raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
-#         # transformed parameters (apply un-transformation rules)
-#         p = self.param_array.copy()
-#         [np.put(p, ind, c.finv(p[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
-#         if self.has_parent() and self.constraints[__fixed__].size != 0:
-#             fixes = np.ones(self.size).astype(bool)
-#             fixes[self.constraints[__fixed__]] = FIXED
-#             return p[fixes]
-#         elif self._has_fixes():
-#             return p[self._fixes_]
-#         return p
 #
    def _set_params_transformed(self, p):
        raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"

-#         """
-#         Set parameters p, but make sure they get transformed before setting.
-#         This means, the optimizer sees p, whereas the model sees transformed(p),
-#         such that, the parameters the model sees are in the right domain.
-#         """
-#         if not(p is self.param_array):
-#             if self.has_parent() and self.constraints[__fixed__].size != 0:
-#                 fixes = np.ones(self.size).astype(bool)
-#                 fixes[self.constraints[__fixed__]] = FIXED
-#                 self.param_array.flat[fixes] = p
-#             elif self._has_fixes(): self.param_array.flat[self._fixes_] = p
-#             else: self.param_array.flat = p
-#         [np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
-#          for c, ind in self.constraints.iteritems() if c != __fixed__]
-#         self._trigger_params_changed()
-
    def _trigger_params_changed(self, trigger_parent=True):
        """
        First tell all children to update,
@ -736,7 +710,7 @@ class OptimizationHandlable(Indexable):

        If trigger_parent is True, we will tell the parent, otherwise not.
        """
-        [p._trigger_params_changed(trigger_parent=False) for p in self.parameters]
+        [p._trigger_params_changed(trigger_parent=False) for p in self.parameters if not p.is_fixed]
        self.notify_observers(None, None if trigger_parent else -np.inf)

    def _size_transformed(self):
--- a/GPy/core/sparse_gp.py
+++ b/GPy/core/sparse_gp.py
@ -60,10 +60,14 @@ class SparseGP(GP):
        self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
        if isinstance(self.X, VariationalPosterior):
            #gradients wrt kernel
-            dL_dKmm = self.grad_dict.pop('dL_dKmm')
+            dL_dKmm = self.grad_dict['dL_dKmm']
            self.kern.update_gradients_full(dL_dKmm, self.Z, None)
            target = self.kern.gradient.copy()
-            self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.grad_dict['dL_dpsi0'], dL_dpsi1=self.grad_dict['dL_dpsi1'], dL_dpsi2=self.grad_dict['dL_dpsi2'])
+            self.kern.update_gradients_expectations(variational_posterior=self.X,
+                                                    Z=self.Z,
+                                                    dL_dpsi0=self.grad_dict['dL_dpsi0'],
+                                                    dL_dpsi1=self.grad_dict['dL_dpsi1'],
+                                                    dL_dpsi2=self.grad_dict['dL_dpsi2'])
            self.kern.gradient += target

            #gradients wrt Z
--- a/GPy/inference/latent_function_inference/var_dtc.py
+++ b/GPy/inference/latent_function_inference/var_dtc.py
@ -194,7 +194,7 @@ class VarDTC(LatentFunctionInference):
        return post, log_marginal, grad_dict

 class VarDTCMissingData(LatentFunctionInference):
-    const_jitter = 1e-6
+    const_jitter = 1e-10
    def __init__(self, limit=1, inan=None):
        from ...util.caching import Cacher
        self._Y = Cacher(self._subarray_computations, limit)
@ -289,13 +289,6 @@ class VarDTCMissingData(LatentFunctionInference):
        Lm = jitchol(Kmm)
        if uncertain_inputs: LmInv = dtrtri(Lm)

-        #VVT_factor_all = np.empty(Y.shape)
-        #full_VVT_factor = VVT_factor_all.shape[1] == Y.shape[1]
-        #if not full_VVT_factor:
-        #    psi1V = np.dot(Y.T*beta_all, psi1_all).T
-
-        #logger.info('computing dimension-wise likelihood and derivatives')
-        #size = len(Ys)
        size = Y.shape[1]
        next_ten = 0
        for i, [y, v, trYYT] in enumerate(itertools.izip(Ys, self._inan.T, traces)):
@ -348,7 +341,6 @@ class VarDTCMissingData(LatentFunctionInference):
                VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
                psi1, het_noise, uncertain_inputs)

-            #import ipdb;ipdb.set_trace()
            dL_dpsi0_all[v] += dL_dpsi0
            dL_dpsi1_all[v, :] += dL_dpsi1
            if uncertain_inputs:
--- a/GPy/kern/_src/add.py
+++ b/GPy/kern/_src/add.py
@ -10,7 +10,7 @@ class Add(CombinationKernel):
    """
    Add given list of kernels together.
    propagates gradients through.
-    
+
    This kernel will take over the active dims of it's subkernels passed in.
    """
    def __init__(self, subkerns, name='add'):
@ -40,7 +40,7 @@ class Add(CombinationKernel):
        return reduce(np.add, (p.Kdiag(X) for p in which_parts))

    def update_gradients_full(self, dL_dK, X, X2=None):
-        [p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
+        [p.update_gradients_full(dL_dK, X, X2) for p in self.parts if not p.is_fixed]

    def update_gradients_diag(self, dL_dK, X):
        [p.update_gradients_diag(dL_dK, X) for p in self.parts]
--- a/GPy/plotting/matplot_dep/dim_reduction_plots.py
+++ b/GPy/plotting/matplot_dep/dim_reduction_plots.py
@ -30,7 +30,7 @@ def most_significant_input_dimensions(model, which_indices):
 def plot_latent(model, labels=None, which_indices=None,
                resolution=50, ax=None, marker='o', s=40,
                fignum=None, plot_inducing=False, legend=True,
-                plot_limits=None, 
+                plot_limits=None,
                aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
    """
    :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
@ -84,6 +84,7 @@ def plot_latent(model, labels=None, which_indices=None,
                            cmap=pb.cm.binary, **imshow_kwargs)

    # make sure labels are in order of input:
+    labels = np.asarray(labels)
    ulabels = []
    for lab in labels:
        if not lab in ulabels:
--- a/GPy/util/config.py
+++ b/GPy/util/config.py
@ -8,9 +8,9 @@ config = ConfigParser.ConfigParser()
 # This is the default configuration file that always needs to be present.
 default_file = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'defaults.cfg'))

-# These files are optional 
+# These files are optional
 # This specifies configurations that are typically specific to the machine (it is found alongside the GPy installation).
-local_file = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'machine.cfg'))
+local_file = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'installation.cfg'))

 # This specifies configurations specific to the user (it is found in the user home directory)
 home = os.getenv('HOME') or os.getenv('USERPROFILE')
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -659,6 +659,22 @@ def ripley_synth(data_set='ripley_prnn_data'):
    ytest = test[:, 2:3]
    return data_details_return({'X': X, 'Y': y, 'Xtest': Xtest, 'Ytest': ytest, 'info': 'Synthetic data generated by Ripley for a two class classification problem.'}, data_set)

+def global_average_temperature(data_set='global_temperature', num_train=1000, refresh_data=False):
+    path = os.path.join(data_path, data_set)
+    if data_available(data_set) and not refresh_data:
+        print 'Using cached version of the data set, to use latest version set refresh_data to True'
+    else:
+        download_data(data_set)
+    data = np.loadtxt(os.path.join(data_path, data_set, 'GLBTS.long.data'))
+    print 'Most recent data observation from month ', data[-1, 1], ' in year ', data[-1, 0]
+    allX = data[data[:, 3]!=-99.99, 2:3]
+    allY = data[data[:, 3]!=-99.99, 3:4]
+    X = allX[:num_train, 0:1]
+    Xtest = allX[num_train:, 0:1]
+    Y = allY[:num_train, 0:1]
+    Ytest = allY[num_train:, 0:1]
+    return data_details_return({'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Mauna Loa data with " + str(num_train) + " values used as training points."}, data_set)
+
 def mauna_loa(data_set='mauna_loa', num_train=545, refresh_data=False):
    path = os.path.join(data_path, data_set)
    if data_available(data_set) and not refresh_data:
--- a/GPy/util/linalg.py
+++ b/GPy/util/linalg.py
@ -15,6 +15,7 @@ import scipy
 import warnings
 import os
 from config import *
+import logging

 _scipyversion = np.float64((scipy.__version__).split('.')[:2])
 _fix_dpotri_scipy_bug = True
@ -93,14 +94,20 @@ def jitchol(A, maxtries=5):
            raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
        jitter = diagA.mean() * 1e-6
        while maxtries > 0 and np.isfinite(jitter):
-            print 'Warning: adding jitter of {:.10e}'.format(jitter)
            try:
-                return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
+                L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
            except:
                jitter *= 10
            finally:
                maxtries -= 1
        raise linalg.LinAlgError, "not positive definite, even with jitter."
+    import traceback
+    try: raise
+    except:
+        logging.warning('\n'.join(['Added jitter of {:.10e}'.format(jitter),
+            '  in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
+    import ipdb;ipdb.set_trace()
+    return L



@ -110,7 +117,7 @@ def jitchol(A, maxtries=5):
 #     """
 #     Wrapper for lapack dtrtri function
 #     Inverse of L
-# 
+#
 #     :param L: Triangular Matrix L
 #     :param lower: is matrix lower (true) or upper (false)
 #     :returns: Li, info
@ -122,10 +129,17 @@ def dtrtrs(A, B, lower=1, trans=0, unitdiag=0):
    """
    Wrapper for lapack dtrtrs function

+    DTRTRS solves a triangular system of the form
+
+        A * X = B  or  A**T * X = B,
+
+    where A is a triangular matrix of order N, and B is an N-by-NRHS
+    matrix.  A check is made to verify that A is nonsingular.
+
    :param A: Matrix A(triangular)
    :param B: Matrix B
    :param lower: is matrix lower (true) or upper (false)
-    :returns:
+    :returns: Solution to A * X = B or A**T * X = B

    """
    A = np.asfortranarray(A)
@ -146,11 +160,11 @@ def dpotrs(A, B, lower=1):
 def dpotri(A, lower=1):
    """
    Wrapper for lapack dpotri function
-    
+
    DPOTRI - compute the inverse of a real symmetric positive
      definite matrix A using the Cholesky factorization A =
      U**T*U or A = L*L**T computed by DPOTRF
-      
+
    :param A: Matrix A
    :param lower: is matrix lower (true) or upper (false)
    :returns: A inverse
@ -159,7 +173,7 @@ def dpotri(A, lower=1):
    if _fix_dpotri_scipy_bug:
        assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays"
        lower = 0
-        
+
    A = force_F_ordered(A)
    R, info = lapack.dpotri(A, lower=lower) #needs to be zero here, seems to be a scipy bug

--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -2,3 +2,7 @@ include *.txt
 recursive-include doc *.txt
 include *.md
 recursive-include doc *.md
+include *.cfg
+recursive-include doc *.cfg
+include *.json
+recursive-include doc *.json
--- a/setup.py
+++ b/setup.py
@ -1,12 +1,13 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

-import os
+#import os
 from setuptools import setup

 # Version number
 version = '0.4.6'

+from pkg_resources import Requirement, resource_string
 def read(fname):
    return open(os.path.join(os.path.dirname(__file__), fname)).read()

@ -20,7 +21,7 @@ setup(name = 'GPy',
      url = "http://sheffieldml.github.com/GPy/",
      packages = ["GPy.models", "GPy.inference.optimization", "GPy.inference", "GPy.inference.latent_function_inference", "GPy.likelihoods", "GPy.mappings", "GPy.examples", "GPy.core.parameterization", "GPy.core", "GPy.testing", "GPy", "GPy.util", "GPy.kern", "GPy.kern._src.psi_comp", "GPy.kern._src", "GPy.plotting.matplot_dep.latent_space_visualizations.controllers", "GPy.plotting.matplot_dep.latent_space_visualizations", "GPy.plotting.matplot_dep", "GPy.plotting"],
      package_dir={'GPy': 'GPy'},
-      package_data = {'GPy': ['GPy/examples']},
+      package_data = {'GPy': ['defaults.cfg', 'installation.cfg', 'util/data_resources.json', 'util/football_teams.json']},
      py_modules = ['GPy.__init__'],
      long_description=read('README.md'),
      install_requires=['numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1', 'nose'],
@ -29,4 +30,5 @@ setup(name = 'GPy',
      },
      classifiers=[
      "License :: OSI Approved :: BSD License"],
+      zip_safe = False
      )