diff --git a/.travis.yml b/.travis.yml index c40d73cb..3e8b9be5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,7 +19,7 @@ env: #- PYTHON_VERSION=3.4 - PYTHON_VERSION=3.5 - PYTHON_VERSION=3.6 - - PYTHON_VERSION=3.7 + #- PYTHON_VERSION=3.7 before_install: - wget https://github.com/mzwiessele/travis_scripts/raw/master/download_miniconda.sh diff --git a/GPy/core/gp.py b/GPy/core/gp.py index ab7e8f9d..977882ac 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -144,14 +144,14 @@ class GP(Model): return input_dict @staticmethod - def _build_from_input_dict(input_dict, data=None): + def _format_input_dict(input_dict, data=None): import GPy import numpy as np if (input_dict['X'] is None) or (input_dict['Y'] is None): assert(data is not None) input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1]) elif data is not None: - print("WARNING: The model has been saved with X,Y! The original values are being overriden!") + warnings.warn("WARNING: The model has been saved with X,Y! The original values are being overridden!") input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1]) else: input_dict["X"], input_dict["Y"] = np.array(input_dict['X']), np.array(input_dict['Y']) @@ -173,6 +173,11 @@ class GP(Model): input_dict["normalizer"] = GPy.util.normalizer._Norm.from_dict(normalizer) else: input_dict["normalizer"] = normalizer + return input_dict + + @staticmethod + def _build_from_input_dict(input_dict, data=None): + input_dict = GP._format_input_dict(input_dict, data) return GP(**input_dict) def save_model(self, output_filename, compress=True, save_data=True): @@ -573,7 +578,7 @@ class GP(Model): mag[n] = np.sqrt(np.linalg.det(G[n, :, :])) return mag - def posterior_samples_f(self,X, size=10, full_cov=True, **predict_kwargs): + def posterior_samples_f(self,X, size=10, **predict_kwargs): """ Samples the posterior GP at the points X. @@ -581,35 +586,28 @@ class GP(Model): :type X: np.ndarray (Nnew x self.input_dim) :param size: the number of a posteriori samples. :type size: int. - :param full_cov: whether to return the full covariance matrix, or just the diagonal. - :type full_cov: bool. - :returns: fsim: set of simulations - :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension) + :returns: set of simulations + :rtype: np.ndarray (Nnew x D x samples) """ - m, v = self._raw_predict(X, full_cov=full_cov, **predict_kwargs) + m, v = self._raw_predict(X, full_cov=True, **predict_kwargs) if self.normalizer is not None: m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v) def sim_one_dim(m, v): - if not full_cov: - return np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T - else: - return np.random.multivariate_normal(m.flatten(), v, size).T + return np.random.multivariate_normal(m, v, size).T if self.output_dim == 1: - return sim_one_dim(m, v) + return sim_one_dim(m.flatten(), v)[:, np.newaxis, :] else: - fsim = np.empty((self.output_dim, X.shape[0], size)) + fsim = np.empty((X.shape[0], self.output_dim, size)) for d in range(self.output_dim): - if full_cov and v.ndim == 3: - fsim[d] = sim_one_dim(m[:, d], v[:, :, d]) - elif (not full_cov) and v.ndim == 2: - fsim[d] = sim_one_dim(m[:, d], v[:, d]) + if v.ndim == 3: + fsim[:, d, :] = sim_one_dim(m[:, d], v[:, :, d]) else: - fsim[d] = sim_one_dim(m[:, d], v) + fsim[:, d, :] = sim_one_dim(m[:, d], v) return fsim - def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None, likelihood=None, **predict_kwargs): + def posterior_samples(self, X, size=10, Y_metadata=None, likelihood=None, **predict_kwargs): """ Samples the posterior GP at the points X. @@ -617,19 +615,17 @@ class GP(Model): :type X: np.ndarray (Nnew x self.input_dim.) :param size: the number of a posteriori samples. :type size: int. - :param full_cov: whether to return the full covariance matrix, or just the diagonal. - :type full_cov: bool. :param noise_model: for mixed noise likelihood, the noise model to use in the samples. :type noise_model: integer. :returns: Ysim: set of simulations, :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension) """ - fsim = self.posterior_samples_f(X, size, full_cov=full_cov, **predict_kwargs) + fsim = self.posterior_samples_f(X, size, **predict_kwargs) if likelihood is None: likelihood = self.likelihood if fsim.ndim == 3: - for d in range(fsim.shape[0]): - fsim[d] = likelihood.samples(fsim[d], Y_metadata=Y_metadata) + for d in range(fsim.shape[1]): + fsim[:, d] = likelihood.samples(fsim[:, d], Y_metadata=Y_metadata) else: fsim = likelihood.samples(fsim, Y_metadata=Y_metadata) return fsim @@ -650,7 +646,7 @@ class GP(Model): :param max_iters: maximum number of function evaluations :type max_iters: int - :messages: whether to display during optimisation + :param messages: whether to display during optimisation :type messages: bool :param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'. :type optimizer: string diff --git a/GPy/core/sparse_gp.py b/GPy/core/sparse_gp.py index eb3650af..d9439d28 100644 --- a/GPy/core/sparse_gp.py +++ b/GPy/core/sparse_gp.py @@ -130,37 +130,13 @@ class SparseGP(GP): input_dict["Z"] = self.Z.tolist() return input_dict + @staticmethod + def _format_input_dict(input_dict, data=None): + input_dict = GP._format_input_dict(input_dict, data) + input_dict["Z"] = np.array(input_dict["Z"]) + return input_dict + @staticmethod def _build_from_input_dict(input_dict, data=None): - # Called from the from_dict method. - import GPy - if (input_dict['X'] is None) or (input_dict['Y'] is None): - if data is None: - raise ValueError("The model was serialized whithout the training data. 'data' must be not None!") - input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1]) - elif data is not None: - print("WARNING: The model has been saved with X,Y! The original values are being overriden!") - input_dict["X"], input_dict["Y"] = np.array(data[0]), np.array(data[1]) - else: - input_dict["X"], input_dict["Y"] = np.array(input_dict['X']), np.array(input_dict['Y']) - - input_dict["Z"] = np.array(input_dict['Z']) - input_dict["kernel"] = GPy.kern.Kern.from_dict(input_dict["kernel"]) - input_dict["likelihood"] = GPy.likelihoods.likelihood.Likelihood.from_dict(input_dict["likelihood"]) - mean_function = input_dict.get("mean_function") - if mean_function is not None: - input_dict["mean_function"] = GPy.core.mapping.Mapping.from_dict(mean_function) - else: - input_dict["mean_function"] = mean_function - input_dict["inference_method"] = GPy.inference.latent_function_inference.LatentFunctionInference.from_dict(input_dict["inference_method"]) - - #FIXME: Assumes the Y_metadata is serializable. We should create a Metadata class - Y_metadata = input_dict.get("Y_metadata") - input_dict["Y_metadata"] = Y_metadata - - normalizer = input_dict.get("normalizer") - if normalizer is not None: - input_dict["normalizer"] = GPy.util.normalizer._Norm.from_dict(normalizer) - else: - input_dict["normalizer"] = normalizer + input_dict = SparseGP._format_input_dict(input_dict, data) return SparseGP(**input_dict) diff --git a/GPy/kern/__init__.py b/GPy/kern/__init__.py index 96abab39..1fedb314 100644 --- a/GPy/kern/__init__.py +++ b/GPy/kern/__init__.py @@ -34,6 +34,7 @@ from .src.splitKern import DEtime as DiffGenomeKern from .src.spline import Spline from .src.basis_funcs import LogisticBasisFuncKernel, LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel, PolynomialBasisFuncKernel from .src.grid_kerns import GridRBF +from .src.symmetric import Symmetric from .src.sde_matern import sde_Matern32 from .src.sde_matern import sde_Matern52 diff --git a/GPy/kern/src/symmetric.py b/GPy/kern/src/symmetric.py new file mode 100644 index 00000000..c7207023 --- /dev/null +++ b/GPy/kern/src/symmetric.py @@ -0,0 +1,170 @@ +import numpy as np + +from .kern import Kern + + +class Symmetric(Kern): + """ + Symmetric kernel that models a function with even or odd symmetry: + + For even symmetry we have: + + .. math:: + + f(x) = f(Ax) + + we then model the function as: + + .. math:: + + f(x) = g(x) + g(Ax) + + the corresponding kernel is: + + .. math:: + + k(x, x') + k(Ax, x') + k(x, Ax') + k(Ax, Ax') + + For odd symmetry we have: + + .. math:: + + f(x) = -f(Ax) + + it does this by modelling: + + .. math:: + + f(x) = g(x) - g(Ax) + + with kernel + + .. math:: + + k(x, x') - k(Ax, x') - k(x, Ax') + k(Ax, Ax') + + where k(x, x') is the kernel of g(x) + + :param base_kernel: kernel to make symmetric + :param transform: transformation matrix describing symmetry plane, A in equations above + :param symmetry_type: 'odd' or 'even' depending on the symmetry needed + """ + + def __init__(self, base_kernel, transform, symmetry_type='even'): + n_dims = max(base_kernel.active_dims) + 1 + super(Symmetric, self).__init__(n_dims, list(range(n_dims)), name='symmetric_kernel') + if symmetry_type is 'odd': + self.symmetry_sign = -1. + elif symmetry_type is 'even': + self.symmetry_sign = 1. + else: + raise ValueError('symmetry_type input must be ''odd'' or ''even''') + self.transform = transform + self.base_kernel = base_kernel + self.param_names = base_kernel.parameter_names() + self.link_parameters(self.base_kernel) + + def K(self, X, X2): + X_sym = X.dot(self.transform) + + if X2 is None: + X2 = X + X2_sym = X_sym + else: + X2_sym = X2.dot(self.transform) + + cross_term_x_ax = self.symmetry_sign * self.base_kernel.K(X, X2_sym) + + if X2 is None: + cross_term_ax_x = cross_term_x_ax.T + else: + cross_term_ax_x = self.symmetry_sign * \ + self.base_kernel.K(X_sym, X2) + + return (self.base_kernel.K(X, X2) + cross_term_x_ax + cross_term_ax_x + + self.base_kernel.K(X_sym, X2_sym)) + + def Kdiag(self, X): + n_points = X.shape[0] + X_sym = X.dot(self.transform) + + # Evaluate cross terms in batches, taking the diag of a larger matrix + # is wasteful, but is more efficient than calling kernel.K for each data point + batch_size = 100 + n_batches = int(np.ceil(n_points / float(batch_size))) + cross_term = np.zeros(X.shape[0]) + for i in range(n_batches): + i_start = i * batch_size + i_end = np.min([(i + 1) * batch_size, n_points]) + cross_term[i_start:i_end] = np.diag(self.base_kernel.K( + X_sym[i_start:i_end, :], X[i_start:i_end, :])) + + return self.base_kernel.Kdiag(X) + 2 * self.symmetry_sign * cross_term + self.base_kernel.Kdiag(X_sym) + + def update_gradients_full(self, dL_dK, X, X2): + X_sym = X.dot(self.transform) + if X2 is None: + X2 = X + X2_sym = X2.dot(self.transform) + + # Get gradients from base kernel one term at a time + self.base_kernel.update_gradients_full(dL_dK, X_sym, X2) + gradient = self.symmetry_sign * self.base_kernel.gradient.copy() + + self.base_kernel.update_gradients_full(dL_dK, X, X2_sym) + gradient += self.symmetry_sign * self.base_kernel.gradient.copy() + + self.base_kernel.update_gradients_full(dL_dK, X_sym, X2_sym) + gradient += self.base_kernel.gradient.copy() + + self.base_kernel.update_gradients_full(dL_dK, X, X2) + gradient += self.base_kernel.gradient.copy() + + # Set gradients + self.base_kernel.gradient = gradient + + def update_gradients_diag(self, dL_dK, X): + + dL_dK_full = np.diag(dL_dK) + X_sym = X.dot(self.transform) + + # Calculate gradient for k(Ax, Ax') + self.base_kernel.update_gradients_diag(dL_dK, X_sym) + gradient = self.base_kernel.gradient.copy() + + # Calculate gradient for k(x, x') + self.base_kernel.update_gradients_diag(dL_dK, X) + gradient += self.base_kernel.gradient.copy() + + # Batch process cross term for speed + batch_size = 100 + n_points = dL_dK.shape[0] + n_batches = int(np.ceil(n_points / float(batch_size))) + gradient_part = np.zeros(gradient.shape) + for i in range(n_batches): + i_start = i * batch_size + i_end = np.min([(i + 1) * batch_size, n_points]) + dL_dK_part = dL_dK_full[i_start:i_end, i_start:i_end] + X_part = X[i_start:i_end, :] + X_sym_part = X_sym[i_start:i_end, :] + self.base_kernel.update_gradients_full( + dL_dK_part, X_part, X_sym_part) + gradient_part += self.base_kernel.gradient.copy() + + gradient += 2 * self.symmetry_sign * gradient_part + + self.base_kernel.gradient = gradient + + def gradients_X(self, dL_dK, X, X2): + X_sym = X.dot(self.transform) + if X2 is None: + X2 = X + X2_sym = X.dot(self.transform) + dL_dK = dL_dK + dL_dK.T + else: + X2_sym = X2.dot(self.transform) + + return (self.base_kernel.gradients_X(dL_dK, X, X2) + + self.base_kernel.gradients_X(dL_dK, X_sym, X2_sym).dot(self.transform.T) + + self.symmetry_sign * self.base_kernel.gradients_X(dL_dK, X, X2_sym) + + self.symmetry_sign * self.base_kernel.gradients_X(dL_dK, X_sym, X2).dot(self.transform.T)) diff --git a/GPy/models/gp_classification.py b/GPy/models/gp_classification.py index 702d2105..8a68e74d 100644 --- a/GPy/models/gp_classification.py +++ b/GPy/models/gp_classification.py @@ -16,18 +16,27 @@ class GPClassification(GP): :param X: input observations :param Y: observed values, can be None if likelihood is not None :param kernel: a GPy kernel, defaults to rbf + :param likelihood: a GPy likelihood, defaults to Bernoulli + :param inference_method: Latent function inference to use, defaults to EP + :type inference_method: :class:`GPy.inference.latent_function_inference.LatentFunctionInference` .. Note:: Multiple independent outputs are allowed using columns of Y """ - def __init__(self, X, Y, kernel=None,Y_metadata=None, mean_function=None): + def __init__(self, X, Y, kernel=None,Y_metadata=None, mean_function=None, inference_method=None, + likelihood=None, normalizer=False): if kernel is None: kernel = kern.RBF(X.shape[1]) - likelihood = likelihoods.Bernoulli() + if likelihood is None: + likelihood = likelihoods.Bernoulli() - GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=EP(), mean_function=mean_function, name='gp_classification') + if inference_method is None: + inference_method = EP() + + GP.__init__(self, X=X, Y=Y, kernel=kernel, likelihood=likelihood, inference_method=inference_method, + mean_function=mean_function, name='gp_classification', normalizer=normalizer) @staticmethod def from_gp(gp): @@ -48,3 +57,9 @@ class GPClassification(GP): def save_model(self, output_filename, compress=True, save_data=True): self._save_model(output_filename, compress=True, save_data=True) + + @staticmethod + def _build_from_input_dict(input_dict, data=None): + input_dict = GPClassification._format_input_dict(input_dict, data) + input_dict.pop('name', None) # Name parameter not required by GPClassification + return GPClassification(**input_dict) diff --git a/GPy/models/sparse_gp_classification.py b/GPy/models/sparse_gp_classification.py index 0e44966a..296b70f4 100644 --- a/GPy/models/sparse_gp_classification.py +++ b/GPy/models/sparse_gp_classification.py @@ -17,8 +17,10 @@ class SparseGPClassification(SparseGP): :param X: input observations :param Y: observed values - :param likelihood: a GPy likelihood, defaults to Binomial with probit link_function + :param likelihood: a GPy likelihood, defaults to Bernoulli :param kernel: a GPy kernel, defaults to rbf+white + :param inference_method: Latent function inference to use, defaults to EPDTC + :type inference_method: :class:`GPy.inference.latent_function_inference.LatentFunctionInference` :param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :type normalize_X: False|True :param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales) @@ -27,11 +29,13 @@ class SparseGPClassification(SparseGP): """ - def __init__(self, X, Y=None, likelihood=None, kernel=None, Z=None, num_inducing=10, Y_metadata=None): + def __init__(self, X, Y=None, likelihood=None, kernel=None, Z=None, num_inducing=10, Y_metadata=None, + mean_function=None, inference_method=None, normalizer=False): if kernel is None: kernel = kern.RBF(X.shape[1]) - likelihood = likelihoods.Bernoulli() + if likelihood is None: + likelihood = likelihoods.Bernoulli() if Z is None: i = np.random.permutation(X.shape[0])[:num_inducing] @@ -39,7 +43,11 @@ class SparseGPClassification(SparseGP): else: assert Z.shape[1] == X.shape[1] - SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=EPDTC(), name='SparseGPClassification',Y_metadata=Y_metadata) + if inference_method is None: + inference_method = EPDTC() + + SparseGP.__init__(self, X, Y, Z, kernel, likelihood, mean_function=mean_function, inference_method=inference_method, + normalizer=normalizer, name='SparseGPClassification', Y_metadata=Y_metadata) @staticmethod def from_sparse_gp(sparse_gp): @@ -58,6 +66,12 @@ class SparseGPClassification(SparseGP): model_dict["class"] = "GPy.models.SparseGPClassification" return model_dict + @staticmethod + def _build_from_input_dict(input_dict, data=None): + input_dict = SparseGPClassification._format_input_dict(input_dict, data) + input_dict.pop('name', None) # Name parameter not required by SparseGPClassification + return SparseGPClassification(**input_dict) + @staticmethod def from_dict(input_dict, data=None): """ diff --git a/GPy/testing/kernel_tests.py b/GPy/testing/kernel_tests.py index e1c9d934..02186c62 100644 --- a/GPy/testing/kernel_tests.py +++ b/GPy/testing/kernel_tests.py @@ -482,7 +482,19 @@ class KernelGradientTestsContinuous(unittest.TestCase): k = GPy.kern.StdPeriodic(self.D) k.randomize() self.assertTrue(check_kernel_gradient_functions(k, X=self.X, X2=self.X2, verbose=verbose)) - + + def test_symmetric_even(self): + k_base = GPy.kern.Linear(1) + GPy.kern.RBF(1) + transform = -np.array([[1.0]]) + k = GPy.kern.Symmetric(k_base, transform, 'even') + self.assertTrue(check_kernel_gradient_functions(k)) + + def test_symmetric_odd(self): + k_base = GPy.kern.Linear(1) + GPy.kern.RBF(1) + transform = -np.array([[1.0]]) + k = GPy.kern.Symmetric(k_base, transform, 'odd') + self.assertTrue(check_kernel_gradient_functions(k)) + def test_MultioutputKern(self): k1 = GPy.kern.RBF(self.D, ARD=True) k1.randomize() diff --git a/GPy/testing/serialization_tests.py b/GPy/testing/serialization_tests.py index 57781bf3..d147d59c 100644 --- a/GPy/testing/serialization_tests.py +++ b/GPy/testing/serialization_tests.py @@ -237,7 +237,9 @@ class Test(unittest.TestCase): m.save_model("temp_test_gp_classifier_with_data.json", compress=True, save_data=True) m.save_model("temp_test_gp_classifier_without_data.json", compress=True, save_data=False) m1_r = GPy.models.GPClassification.load_model("temp_test_gp_classifier_with_data.json.zip") + self.assertTrue(type(m) == type(m1_r), "Incorrect model type. Expected: {} Actual: {}".format(type(m), type(m1_r))) m2_r = GPy.models.GPClassification.load_model("temp_test_gp_classifier_without_data.json.zip", (X,Y)) + self.assertTrue(type(m) == type(m2_r), "Incorrect model type. Expected: {} Actual: {}".format(type(m), type(m2_r))) os.remove("temp_test_gp_classifier_with_data.json.zip") os.remove("temp_test_gp_classifier_without_data.json.zip") @@ -259,7 +261,9 @@ class Test(unittest.TestCase): m.save_model("temp_test_sparse_gp_classifier_with_data.json", compress=True, save_data=True) m.save_model("temp_test_sparse_gp_classifier_without_data.json", compress=True, save_data=False) m1_r = GPy.models.SparseGPClassification.load_model("temp_test_sparse_gp_classifier_with_data.json.zip") + self.assertTrue(type(m) == type(m1_r), "Incorrect model type. Expected: {} Actual: {}".format(type(m), type(m1_r))) m2_r = GPy.models.SparseGPClassification.load_model("temp_test_sparse_gp_classifier_without_data.json.zip", (X,Y)) + self.assertTrue(type(m) == type(m2_r), "Incorrect model type. Expected: {} Actual: {}".format(type(m), type(m2_r))) os.remove("temp_test_sparse_gp_classifier_with_data.json.zip") os.remove("temp_test_sparse_gp_classifier_without_data.json.zip") diff --git a/appveyor.yml b/appveyor.yml index 16b649c9..2d1aca22 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -12,7 +12,7 @@ environment: - PYTHON_VERSION: 3.6 MINICONDA: C:\Miniconda36-x64 - PYTHON_VERSION: 3.7 - MINICONDA: C:\Miniconda37-x64 + MINICONDA: C:\Miniconda36-x64 #configuration: # - Debug