This commit is contained in:
mzwiessele 2016-08-03 12:54:05 +01:00
commit 9319a1dfc6
53 changed files with 126 additions and 47 deletions

View file

@ -211,6 +211,12 @@ class Kern(Parameterized):
def input_sensitivity(self, summarize=True):
"""
Returns the sensitivity for each dimension of this kernel.
This is an arbitrary measurement based on the parameters
of the kernel per dimension and scaling in general.
Use this as relative measurement, not for absolute comparison between
kernels.
"""
return np.zeros(self.input_dim)

View file

@ -99,7 +99,7 @@ class Prod(CombinationKernel):
def input_sensitivity(self, summarize=True):
if summarize:
i_s = np.zeros((self.input_dim))
i_s = np.ones((self.input_dim))
for k in self.parts:
i_s[k._all_dims_active] *= k.input_sensitivity(summarize)
return i_s

View file

@ -51,6 +51,10 @@ class Stationary(Kern):
The lengthscale(s) and variance parameters are added to the structure automatically.
Thanks to @strongh:
In Stationary, a covariance function is defined in GPy as stationary when it depends only on the l2-norm |x_1 - x_2 |.
However this is the typical definition of isotropy, while stationarity is usually a bit more relaxed.
The more common version of stationarity is that the covariance is a function of x_1 - x_2 (See e.g. R&W first paragraph of section 4.1).
"""
def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=False):

View file

@ -23,9 +23,10 @@ class Additive(Mapping):
assert(mapping1.input_dim==mapping2.input_dim)
assert(mapping1.output_dim==mapping2.output_dim)
input_dim, output_dim = mapping1.input_dim, mapping1.output_dim
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
super(Additive, self).__init__(input_dim=input_dim, output_dim=output_dim)
self.mapping1 = mapping1
self.mapping2 = mapping2
self.link_parameters(self.mapping1, self.mapping2)
def f(self, X):
return self.mapping1.f(X) + self.mapping2.f(X)

View file

@ -33,7 +33,7 @@ class Linear(Mapping):
return np.dot(X, self.A)
def update_gradients(self, dL_dF, X):
self.A.gradient = np.dot( X.T, dL_dF)
self.A.gradient = np.dot(X.T, dL_dF)
def gradients_X(self, dL_dF, X):
return np.dot(dL_dF, self.A.T)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -28,10 +28,49 @@ class MFtests(unittest.TestCase):
A linear mean function with parameters that we'll learn alongside the kernel
"""
X = np.linspace(-1,10,50).reshape(-1,1)
Y = 3-np.abs((X-6))
Y += .5*np.cos(3*X) + 0.3*np.random.randn(*X.shape)
mf = GPy.mappings.PiecewiseLinear(1, 1, [-1,1], [9,2])
k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian()
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
self.assertTrue(m.checkgrad())
def test_parametric_mean_function_composition(self):
"""
A linear mean function with parameters that we'll learn alongside the kernel
"""
X = np.linspace(0,10,50).reshape(-1,1)
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X
mf = GPy.mappings.Linear(1,1)
mf = GPy.mappings.Compound(GPy.mappings.Linear(1,1),
GPy.mappings.Kernel(1, 1, np.random.normal(0,1,(1,1)),
GPy.kern.RBF(1))
)
k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian()
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
self.assertTrue(m.checkgrad())
def test_parametric_mean_function_additive(self):
"""
A linear mean function with parameters that we'll learn alongside the kernel
"""
X = np.linspace(0,10,50).reshape(-1,1)
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X
mf = GPy.mappings.Additive(GPy.mappings.Constant(1,1,3),
GPy.mappings.Additive(GPy.mappings.MLP(1,1),
GPy.mappings.Identity(1,1)
)
)
k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian()

View file

@ -74,12 +74,13 @@ except ImportError:
extensions = ['npz']
basedir = os.path.dirname(os.path.relpath(os.path.abspath(__file__)))
def _image_directories():
"""
Compute the baseline and result image directories for testing *func*.
Create the result directory if it doesn't exist.
"""
basedir = os.path.dirname(os.path.relpath(os.path.abspath(__file__)))
#module_name = __init__.__module__
#mods = module_name.split('.')
#basedir = os.path.join(*mods)
@ -349,7 +350,9 @@ def test_sparse():
m = GPy.models.SparseGPRegression(X, Y, X_variance=np.ones_like(X)*0.1)
#m.optimize()
#m.plot_inducing()
m.plot_data()
_, ax = plt.subplots()
m.plot_data(ax=ax)
m.plot_data_error(ax=ax)
for do_test in _image_comparison(baseline_images=['sparse_gp_{}'.format(sub) for sub in ['data_error']], extensions=extensions):
yield (do_test, )
@ -397,31 +400,39 @@ def test_sparse_classification():
yield (do_test, )
def test_gplvm():
from ..examples.dimensionality_reduction import _simulate_matern
from ..kern import RBF
from ..models import GPLVM
from GPy.models import GPLVM
np.random.seed(12345)
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
#matplotlib.rcParams[u'figure.figsize'] = (4,3)
matplotlib.rcParams[u'text.usetex'] = False
Q = 3
#Q = 3
# Define dataset
N = 10
k1 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,10,10,0.1,0.1]), ARD=True)
k2 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,0.1,10,0.1,10]), ARD=True)
k3 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[0.1,0.1,10,10,10]), ARD=True)
X = np.random.normal(0, 1, (N, 5))
A = np.random.multivariate_normal(np.zeros(N), k1.K(X), Q).T
B = np.random.multivariate_normal(np.zeros(N), k2.K(X), Q).T
C = np.random.multivariate_normal(np.zeros(N), k3.K(X), Q).T
#N = 60
#k1 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,10,10,0.1,0.1]), ARD=True)
#k2 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,0.1,10,0.1,10]), ARD=True)
#k3 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[0.1,0.1,10,10,10]), ARD=True)
#X = np.random.normal(0, 1, (N, 5))
#A = np.random.multivariate_normal(np.zeros(N), k1.K(X), Q).T
#B = np.random.multivariate_normal(np.zeros(N), k2.K(X), Q).T
#C = np.random.multivariate_normal(np.zeros(N), k3.K(X), Q).T
#Y = np.vstack((A,B,C))
#labels = np.hstack((np.zeros(A.shape[0]), np.ones(B.shape[0]), np.ones(C.shape[0])*2))
Y = np.vstack((A,B,C))
labels = np.hstack((np.zeros(A.shape[0]), np.ones(B.shape[0]), np.ones(C.shape[0])*2))
#k = RBF(Q, ARD=True, lengthscale=2) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
pars = np.load(os.path.join(basedir, 'b-gplvm-save.npz'))
Y = pars['Y']
Q = pars['Q']
labels = pars['labels']
import warnings
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always') # always print
m = GPLVM(Y, Q, initialize=False)
m.update_model(False)
m.initialize_parameter()
m[:] = pars['gplvm_p']
m.update_model(True)
k = RBF(Q, ARD=True, lengthscale=2) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
m = GPLVM(Y, Q, init="PCA", kernel=k)
m.kern.lengthscale[:] = [1./.3, 1./.1, 1./.7]
m.likelihood.variance = .001
#m.optimize(messages=0)
np.random.seed(111)
m.plot_latent(labels=labels)
@ -436,31 +447,40 @@ def test_gplvm():
yield (do_test, )
def test_bayesian_gplvm():
from ..examples.dimensionality_reduction import _simulate_matern
from ..kern import RBF
from ..models import BayesianGPLVM
np.random.seed(12345)
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
#matplotlib.rcParams[u'figure.figsize'] = (4,3)
matplotlib.rcParams[u'text.usetex'] = False
Q = 3
#Q = 3
# Define dataset
N = 10
k1 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,10,10,0.1,0.1]), ARD=True)
k2 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,0.1,10,0.1,10]), ARD=True)
k3 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[0.1,0.1,10,10,10]), ARD=True)
X = np.random.normal(0, 1, (N, 5))
A = np.random.multivariate_normal(np.zeros(N), k1.K(X), Q).T
B = np.random.multivariate_normal(np.zeros(N), k2.K(X), Q).T
C = np.random.multivariate_normal(np.zeros(N), k3.K(X), Q).T
#N = 10
#k1 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,10,10,0.1,0.1]), ARD=True)
#k2 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,0.1,10,0.1,10]), ARD=True)
#k3 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[0.1,0.1,10,10,10]), ARD=True)
#X = np.random.normal(0, 1, (N, 5))
#A = np.random.multivariate_normal(np.zeros(N), k1.K(X), Q).T
#B = np.random.multivariate_normal(np.zeros(N), k2.K(X), Q).T
#C = np.random.multivariate_normal(np.zeros(N), k3.K(X), Q).T
Y = np.vstack((A,B,C))
labels = np.hstack((np.zeros(A.shape[0]), np.ones(B.shape[0]), np.ones(C.shape[0])*2))
#Y = np.vstack((A,B,C))
#labels = np.hstack((np.zeros(A.shape[0]), np.ones(B.shape[0]), np.ones(C.shape[0])*2))
#k = RBF(Q, ARD=True, lengthscale=2) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
pars = np.load(os.path.join(basedir, 'b-gplvm-save.npz'))
Y = pars['Y']
Q = pars['Q']
labels = pars['labels']
import warnings
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always') # always print
m = BayesianGPLVM(Y, Q, initialize=False)
m.update_model(False)
m.initialize_parameter()
m[:] = pars['bgplvm_p']
m.update_model(True)
k = RBF(Q, ARD=True, lengthscale=2) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Y, Q, init="PCA", kernel=k)
m.kern.lengthscale[:] = [1./.3, 1./.1, 1./.7]
m.likelihood.variance = .001
#m.optimize(messages=0)
np.random.seed(111)
m.plot_inducing(projection='2d')

View file

@ -98,7 +98,7 @@ def data_available(dataset_name=None):
try:
from itertools import zip_longest
except ImportError:
from itertools import zip_longest as zip_longest
from itertools import izip_longest as zip_longest
dr = data_resources[dataset_name]
zip_urls = (dr['files'], )
if 'save_names' in dr: zip_urls += (dr['save_names'], )
@ -1033,14 +1033,18 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
data = inner.RPKM.to_frame()
data.columns = [file_info.name[:-18]]
gene_info = inner.Refseq_IDs.to_frame()
gene_info.columns = [file_info.name[:-18]]
gene_info.columns = ['NCBI Reference Sequence']
else:
data[file_info.name[:-18]] = inner.RPKM
gene_info[file_info.name[:-18]] = inner.Refseq_IDs
#gene_info[file_info.name[:-18]] = inner.Refseq_IDs
# Strip GSM number off data index
rep = re.compile('GSM\d+_')
data.columns = data.columns.to_series().apply(lambda row: row[rep.match(row).end():])
from pandas import MultiIndex
columns = MultiIndex.from_tuples([row.split('_', 1) for row in data.columns])
columns.names = ['GEO Accession', 'index']
data.columns = columns
data = data.T
# make sure the same index gets used