mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-09 20:12:38 +02:00
284 lines
10 KiB
Python
284 lines
10 KiB
Python
# ===============================================================================
|
|
# Copyright (c) 2016, Max Zwiessele, Alan Saul
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are met:
|
|
#
|
|
# * Redistributions of source code must retain the above copyright notice, this
|
|
# list of conditions and the following disclaimer.
|
|
#
|
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
# this list of conditions and the following disclaimer in the documentation
|
|
# and/or other materials provided with the distribution.
|
|
#
|
|
# * Neither the name of GPy.testing.util_tests nor the names of its
|
|
# contributors may be used to endorse or promote products derived from
|
|
# this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
# ===============================================================================
|
|
|
|
import numpy as np
|
|
import GPy
|
|
|
|
|
|
class UtilTest:
|
|
def test_checkFinite(self):
|
|
from GPy.util.debug import checkFinite
|
|
|
|
array = np.random.normal(0, 1, 100).reshape(25, 4)
|
|
assert checkFinite(array, name="test")
|
|
|
|
array[np.random.binomial(1, 0.3, array.shape).astype(bool)] = np.nan
|
|
assert not checkFinite(array)
|
|
|
|
def test_checkFullRank(self):
|
|
from GPy.util.debug import checkFullRank
|
|
from GPy.util.linalg import tdot
|
|
|
|
array = np.random.normal(0, 1, 100).reshape(25, 4)
|
|
assert not checkFullRank(tdot(array), name="test")
|
|
|
|
array = np.random.normal(0, 1, (25, 25))
|
|
assert checkFullRank(tdot(array))
|
|
|
|
def test_fixed_inputs_median(self):
|
|
"""test fixed_inputs convenience function"""
|
|
from GPy.plotting.matplot_dep.util import fixed_inputs
|
|
import GPy
|
|
|
|
X = np.random.randn(10, 3)
|
|
Y = np.sin(X) + np.random.randn(10, 3) * 1e-3
|
|
m = GPy.models.GPRegression(X, Y)
|
|
fixed = fixed_inputs(m, [1], fix_routine="median", as_list=True, X_all=False)
|
|
assert (0, np.median(X[:, 0])) in fixed
|
|
assert (2, np.median(X[:, 2])) in fixed
|
|
assert (
|
|
len([t for t in fixed if t[0] == 1]) == 0
|
|
) # Unfixed input should not be in fixed
|
|
|
|
def test_fixed_inputs_mean(self):
|
|
from GPy.plotting.matplot_dep.util import fixed_inputs
|
|
import GPy
|
|
|
|
X = np.random.randn(10, 3)
|
|
Y = np.sin(X) + np.random.randn(10, 3) * 1e-3
|
|
m = GPy.models.GPRegression(X, Y)
|
|
fixed = fixed_inputs(m, [1], fix_routine="mean", as_list=True, X_all=False)
|
|
assert (0, np.mean(X[:, 0])) in fixed
|
|
assert (2, np.mean(X[:, 2])) in fixed
|
|
assert (
|
|
len([t for t in fixed if t[0] == 1]) == 0
|
|
) # Unfixed input should not be in fixed
|
|
|
|
def test_fixed_inputs_zero(self):
|
|
from GPy.plotting.matplot_dep.util import fixed_inputs
|
|
import GPy
|
|
|
|
X = np.random.randn(10, 3)
|
|
Y = np.sin(X) + np.random.randn(10, 3) * 1e-3
|
|
m = GPy.models.GPRegression(X, Y)
|
|
fixed = fixed_inputs(m, [1], fix_routine="zero", as_list=True, X_all=False)
|
|
assert (0, 0.0) in fixed
|
|
assert (2, 0.0) in fixed
|
|
assert (
|
|
len([t for t in fixed if t[0] == 1]) == 0
|
|
) # Unfixed input should not be in fixed
|
|
|
|
def test_fixed_inputs_uncertain(self):
|
|
from GPy.plotting.matplot_dep.util import fixed_inputs
|
|
import GPy
|
|
from GPy.core.parameterization.variational import NormalPosterior
|
|
|
|
X_mu = np.random.randn(10, 3)
|
|
X_var = np.random.randn(10, 3)
|
|
X = NormalPosterior(X_mu, X_var)
|
|
Y = np.sin(X_mu) + np.random.randn(10, 3) * 1e-3
|
|
m = GPy.models.BayesianGPLVM(Y, X=X_mu, X_variance=X_var, input_dim=3)
|
|
fixed = fixed_inputs(m, [1], fix_routine="median", as_list=True, X_all=False)
|
|
assert (0, np.median(X.mean.values[:, 0])) in fixed
|
|
assert (2, np.median(X.mean.values[:, 2])) in fixed
|
|
assert (
|
|
len([t for t in fixed if t[0] == 1]) == 0
|
|
) # Unfixed input should not be in fixed
|
|
|
|
def test_DSYR(self):
|
|
from GPy.util.linalg import DSYR, DSYR_numpy
|
|
|
|
A = np.arange(9.0).reshape(3, 3)
|
|
A = np.dot(A.T, A)
|
|
b = np.ones(3, dtype=float)
|
|
alpha = 1.0
|
|
DSYR(A, b, alpha)
|
|
R = np.array([[46, 55, 64], [55, 67, 79], [64, 79, 94]])
|
|
assert abs(np.sum(A - R)) < 1e-12
|
|
|
|
def test_subarray(self):
|
|
import GPy
|
|
|
|
X = np.zeros((3, 6), dtype=bool)
|
|
X[[1, 1, 1], [0, 4, 5]] = 1
|
|
X[1:, [2, 3]] = 1
|
|
d = GPy.util.subarray_and_sorting.common_subarrays(X, axis=1)
|
|
assert len(d) == 3
|
|
X[:, d[tuple(X[:, 0])]]
|
|
assert d[tuple(X[:, 4])] == d[tuple(X[:, 0])] == [0, 4, 5]
|
|
assert d[tuple(X[:, 1])] == [1]
|
|
|
|
def test_offset_cluster(self):
|
|
# Tests the GPy.util.cluster_with_offset.cluster utility with a small
|
|
# test data set. Not using random noise just in case it occasionally
|
|
# causes it not to cluster correctly.
|
|
# groundtruth cluster identifiers are: [0,1,1,0]
|
|
|
|
# data contains a list of the four sets of time series (3 per data point)
|
|
|
|
data = [
|
|
np.array(
|
|
[
|
|
[2.18094245, 1.96529789, 2.00265523, 2.18218742, 2.06795428],
|
|
[1.62254829, 1.75748448, 1.83879347, 1.87531326, 1.52503496],
|
|
[1.54589609, 1.61607914, 2.00463192, 1.48771394, 1.63339218],
|
|
]
|
|
),
|
|
np.array(
|
|
[
|
|
[2.86766106, 2.97953437, 2.91958876, 2.92510506, 3.03239241],
|
|
[2.57368423, 2.59954886, 3.10000395, 2.75806125, 2.89865704],
|
|
[2.58916318, 2.53698259, 2.63858411, 2.63102504, 2.51853901],
|
|
]
|
|
),
|
|
np.array(
|
|
[
|
|
[2.77834168, 2.9618564, 2.88482141, 3.24259745, 2.9716821],
|
|
[2.60675576, 2.67095624, 2.94824436, 2.80520631, 2.87247516],
|
|
[2.49543562, 2.5492281, 2.6505866, 2.65015308, 2.59738616],
|
|
]
|
|
),
|
|
np.array(
|
|
[
|
|
[1.76783086, 2.21666738, 2.07939706, 1.9268263, 2.23360121],
|
|
[1.94305547, 1.94648592, 2.1278921, 2.09481457, 2.08575238],
|
|
[1.69336013, 1.72285186, 1.6339506, 1.61212022, 1.39198698],
|
|
]
|
|
),
|
|
]
|
|
|
|
# inputs contains their associated X values
|
|
|
|
inputs = [
|
|
np.array([[0.0], [0.68040097], [1.20316795], [1.798749], [2.14891733]]),
|
|
np.array([[0.0], [0.51910637], [0.98259352], [1.57442965], [1.82515098]]),
|
|
np.array([[0.0], [0.66645478], [1.59464591], [1.69769551], [1.80932752]]),
|
|
np.array([[0.0], [0.87512108], [1.71881079], [2.67162871], [3.23761907]]),
|
|
]
|
|
|
|
# try doing the clustering
|
|
active = GPy.util.cluster_with_offset.cluster(data, inputs)
|
|
# check to see that the clustering has correctly clustered the time series.
|
|
clusters = set([frozenset(cluster) for cluster in active])
|
|
assert set([1, 2]) in clusters, "Offset Clustering algorithm failed"
|
|
assert set([0, 3]) in clusters, "Offset Clustering algoirthm failed"
|
|
|
|
|
|
class TestUnivariateGaussian:
|
|
def setup(self):
|
|
self.zz = [-5.0, -0.8, 0.0, 0.5, 2.0, 10.0]
|
|
|
|
def test_logPdfNormal(self):
|
|
from GPy.util.univariate_Gaussian import logPdfNormal
|
|
|
|
self.setup()
|
|
|
|
pySols = [
|
|
-13.4189385332,
|
|
-1.2389385332,
|
|
-0.918938533205,
|
|
-1.0439385332,
|
|
-2.9189385332,
|
|
-50.9189385332,
|
|
]
|
|
diff = 0.0
|
|
for i in range(len(pySols)):
|
|
diff += abs(logPdfNormal(self.zz[i]) - pySols[i])
|
|
assert diff < 1e-10
|
|
|
|
def test_cdfNormal(self):
|
|
from GPy.util.univariate_Gaussian import cdfNormal
|
|
|
|
self.setup()
|
|
|
|
pySols = [
|
|
2.86651571879e-07,
|
|
0.211855398583,
|
|
0.5,
|
|
0.691462461274,
|
|
0.977249868052,
|
|
1.0,
|
|
]
|
|
diff = 0.0
|
|
for i in range(len(pySols)):
|
|
diff += abs(cdfNormal(self.zz[i]) - pySols[i])
|
|
assert diff < 1e-10
|
|
|
|
def test_logCdfNormal(self):
|
|
from GPy.util.univariate_Gaussian import logCdfNormal
|
|
|
|
self.setup()
|
|
|
|
pySols = [
|
|
-15.064998394,
|
|
-1.55185131919,
|
|
-0.69314718056,
|
|
-0.368946415289,
|
|
-0.023012909329,
|
|
0.0,
|
|
]
|
|
diff = 0.0
|
|
for i in range(len(pySols)):
|
|
diff += abs(logCdfNormal(self.zz[i]) - pySols[i])
|
|
assert diff < 1e-10
|
|
|
|
def test_derivLogCdfNormal(self):
|
|
from GPy.util.univariate_Gaussian import derivLogCdfNormal
|
|
|
|
self.setup()
|
|
|
|
pySols = [
|
|
5.18650396941,
|
|
1.3674022693,
|
|
0.79788456081,
|
|
0.50916043387,
|
|
0.0552478626962,
|
|
0.0,
|
|
]
|
|
diff = 0.0
|
|
for i in range(len(pySols)):
|
|
diff += abs(derivLogCdfNormal(self.zz[i]) - pySols[i])
|
|
assert diff < 1e-8
|
|
|
|
|
|
class TestStandardize:
|
|
def setup(self):
|
|
self.normalizer = GPy.util.normalizer.Standardize()
|
|
y = np.stack([np.random.randn(10), 2 * np.random.randn(10)], axis=1)
|
|
self.normalizer.scale_by(y)
|
|
|
|
def test_inverse_covariance(self):
|
|
"""
|
|
Test inverse covariance outputs correct size
|
|
"""
|
|
self.setup()
|
|
covariance = np.random.rand(100, 100)
|
|
output = self.normalizer.inverse_covariance(covariance)
|
|
assert output.shape == (100, 100, 2)
|