This commit is contained in:
mzwiessele 2016-08-03 12:54:05 +01:00
commit 9319a1dfc6
53 changed files with 126 additions and 47 deletions

View file

@ -29,6 +29,7 @@ install:
- echo $PATH - echo $PATH
- source install_retry.sh - source install_retry.sh
- pip install codecov - pip install codecov
- pip install coveralls
- pip install pypandoc - pip install pypandoc
- pip install git+git://github.com/BRML/climin.git - pip install git+git://github.com/BRML/climin.git
- pip install autograd - pip install autograd
@ -40,6 +41,7 @@ script:
after_success: after_success:
- codecov - codecov
- coveralls
before_deploy: before_deploy:
- cd doc - cd doc

View file

@ -211,6 +211,12 @@ class Kern(Parameterized):
def input_sensitivity(self, summarize=True): def input_sensitivity(self, summarize=True):
""" """
Returns the sensitivity for each dimension of this kernel. Returns the sensitivity for each dimension of this kernel.
This is an arbitrary measurement based on the parameters
of the kernel per dimension and scaling in general.
Use this as relative measurement, not for absolute comparison between
kernels.
""" """
return np.zeros(self.input_dim) return np.zeros(self.input_dim)

View file

@ -99,7 +99,7 @@ class Prod(CombinationKernel):
def input_sensitivity(self, summarize=True): def input_sensitivity(self, summarize=True):
if summarize: if summarize:
i_s = np.zeros((self.input_dim)) i_s = np.ones((self.input_dim))
for k in self.parts: for k in self.parts:
i_s[k._all_dims_active] *= k.input_sensitivity(summarize) i_s[k._all_dims_active] *= k.input_sensitivity(summarize)
return i_s return i_s

View file

@ -51,6 +51,10 @@ class Stationary(Kern):
The lengthscale(s) and variance parameters are added to the structure automatically. The lengthscale(s) and variance parameters are added to the structure automatically.
Thanks to @strongh:
In Stationary, a covariance function is defined in GPy as stationary when it depends only on the l2-norm |x_1 - x_2 |.
However this is the typical definition of isotropy, while stationarity is usually a bit more relaxed.
The more common version of stationarity is that the covariance is a function of x_1 - x_2 (See e.g. R&W first paragraph of section 4.1).
""" """
def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=False): def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=False):

View file

@ -23,9 +23,10 @@ class Additive(Mapping):
assert(mapping1.input_dim==mapping2.input_dim) assert(mapping1.input_dim==mapping2.input_dim)
assert(mapping1.output_dim==mapping2.output_dim) assert(mapping1.output_dim==mapping2.output_dim)
input_dim, output_dim = mapping1.input_dim, mapping1.output_dim input_dim, output_dim = mapping1.input_dim, mapping1.output_dim
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim) super(Additive, self).__init__(input_dim=input_dim, output_dim=output_dim)
self.mapping1 = mapping1 self.mapping1 = mapping1
self.mapping2 = mapping2 self.mapping2 = mapping2
self.link_parameters(self.mapping1, self.mapping2)
def f(self, X): def f(self, X):
return self.mapping1.f(X) + self.mapping2.f(X) return self.mapping1.f(X) + self.mapping2.f(X)

View file

@ -33,7 +33,7 @@ class Linear(Mapping):
return np.dot(X, self.A) return np.dot(X, self.A)
def update_gradients(self, dL_dF, X): def update_gradients(self, dL_dF, X):
self.A.gradient = np.dot( X.T, dL_dF) self.A.gradient = np.dot(X.T, dL_dF)
def gradients_X(self, dL_dF, X): def gradients_X(self, dL_dF, X):
return np.dot(dL_dF, self.A.T) return np.dot(dL_dF, self.A.T)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -28,10 +28,49 @@ class MFtests(unittest.TestCase):
A linear mean function with parameters that we'll learn alongside the kernel A linear mean function with parameters that we'll learn alongside the kernel
""" """
X = np.linspace(-1,10,50).reshape(-1,1)
Y = 3-np.abs((X-6))
Y += .5*np.cos(3*X) + 0.3*np.random.randn(*X.shape)
mf = GPy.mappings.PiecewiseLinear(1, 1, [-1,1], [9,2])
k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian()
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
self.assertTrue(m.checkgrad())
def test_parametric_mean_function_composition(self):
"""
A linear mean function with parameters that we'll learn alongside the kernel
"""
X = np.linspace(0,10,50).reshape(-1,1) X = np.linspace(0,10,50).reshape(-1,1)
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X
mf = GPy.mappings.Linear(1,1) mf = GPy.mappings.Compound(GPy.mappings.Linear(1,1),
GPy.mappings.Kernel(1, 1, np.random.normal(0,1,(1,1)),
GPy.kern.RBF(1))
)
k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian()
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
self.assertTrue(m.checkgrad())
def test_parametric_mean_function_additive(self):
"""
A linear mean function with parameters that we'll learn alongside the kernel
"""
X = np.linspace(0,10,50).reshape(-1,1)
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X
mf = GPy.mappings.Additive(GPy.mappings.Constant(1,1,3),
GPy.mappings.Additive(GPy.mappings.MLP(1,1),
GPy.mappings.Identity(1,1)
)
)
k =GPy.kern.RBF(1) k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian() lik = GPy.likelihoods.Gaussian()

View file

@ -74,12 +74,13 @@ except ImportError:
extensions = ['npz'] extensions = ['npz']
basedir = os.path.dirname(os.path.relpath(os.path.abspath(__file__)))
def _image_directories(): def _image_directories():
""" """
Compute the baseline and result image directories for testing *func*. Compute the baseline and result image directories for testing *func*.
Create the result directory if it doesn't exist. Create the result directory if it doesn't exist.
""" """
basedir = os.path.dirname(os.path.relpath(os.path.abspath(__file__)))
#module_name = __init__.__module__ #module_name = __init__.__module__
#mods = module_name.split('.') #mods = module_name.split('.')
#basedir = os.path.join(*mods) #basedir = os.path.join(*mods)
@ -349,7 +350,9 @@ def test_sparse():
m = GPy.models.SparseGPRegression(X, Y, X_variance=np.ones_like(X)*0.1) m = GPy.models.SparseGPRegression(X, Y, X_variance=np.ones_like(X)*0.1)
#m.optimize() #m.optimize()
#m.plot_inducing() #m.plot_inducing()
m.plot_data() _, ax = plt.subplots()
m.plot_data(ax=ax)
m.plot_data_error(ax=ax)
for do_test in _image_comparison(baseline_images=['sparse_gp_{}'.format(sub) for sub in ['data_error']], extensions=extensions): for do_test in _image_comparison(baseline_images=['sparse_gp_{}'.format(sub) for sub in ['data_error']], extensions=extensions):
yield (do_test, ) yield (do_test, )
@ -397,31 +400,39 @@ def test_sparse_classification():
yield (do_test, ) yield (do_test, )
def test_gplvm(): def test_gplvm():
from ..examples.dimensionality_reduction import _simulate_matern from GPy.models import GPLVM
from ..kern import RBF
from ..models import GPLVM
np.random.seed(12345) np.random.seed(12345)
matplotlib.rcParams.update(matplotlib.rcParamsDefault) matplotlib.rcParams.update(matplotlib.rcParamsDefault)
#matplotlib.rcParams[u'figure.figsize'] = (4,3) #matplotlib.rcParams[u'figure.figsize'] = (4,3)
matplotlib.rcParams[u'text.usetex'] = False matplotlib.rcParams[u'text.usetex'] = False
Q = 3 #Q = 3
# Define dataset # Define dataset
N = 10 #N = 60
k1 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,10,10,0.1,0.1]), ARD=True) #k1 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,10,10,0.1,0.1]), ARD=True)
k2 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,0.1,10,0.1,10]), ARD=True) #k2 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,0.1,10,0.1,10]), ARD=True)
k3 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[0.1,0.1,10,10,10]), ARD=True) #k3 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[0.1,0.1,10,10,10]), ARD=True)
X = np.random.normal(0, 1, (N, 5)) #X = np.random.normal(0, 1, (N, 5))
A = np.random.multivariate_normal(np.zeros(N), k1.K(X), Q).T #A = np.random.multivariate_normal(np.zeros(N), k1.K(X), Q).T
B = np.random.multivariate_normal(np.zeros(N), k2.K(X), Q).T #B = np.random.multivariate_normal(np.zeros(N), k2.K(X), Q).T
C = np.random.multivariate_normal(np.zeros(N), k3.K(X), Q).T #C = np.random.multivariate_normal(np.zeros(N), k3.K(X), Q).T
#Y = np.vstack((A,B,C))
#labels = np.hstack((np.zeros(A.shape[0]), np.ones(B.shape[0]), np.ones(C.shape[0])*2))
Y = np.vstack((A,B,C)) #k = RBF(Q, ARD=True, lengthscale=2) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
labels = np.hstack((np.zeros(A.shape[0]), np.ones(B.shape[0]), np.ones(C.shape[0])*2)) pars = np.load(os.path.join(basedir, 'b-gplvm-save.npz'))
Y = pars['Y']
Q = pars['Q']
labels = pars['labels']
import warnings
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always') # always print
m = GPLVM(Y, Q, initialize=False)
m.update_model(False)
m.initialize_parameter()
m[:] = pars['gplvm_p']
m.update_model(True)
k = RBF(Q, ARD=True, lengthscale=2) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
m = GPLVM(Y, Q, init="PCA", kernel=k)
m.kern.lengthscale[:] = [1./.3, 1./.1, 1./.7]
m.likelihood.variance = .001
#m.optimize(messages=0) #m.optimize(messages=0)
np.random.seed(111) np.random.seed(111)
m.plot_latent(labels=labels) m.plot_latent(labels=labels)
@ -436,31 +447,40 @@ def test_gplvm():
yield (do_test, ) yield (do_test, )
def test_bayesian_gplvm(): def test_bayesian_gplvm():
from ..examples.dimensionality_reduction import _simulate_matern
from ..kern import RBF
from ..models import BayesianGPLVM from ..models import BayesianGPLVM
np.random.seed(12345) np.random.seed(12345)
matplotlib.rcParams.update(matplotlib.rcParamsDefault) matplotlib.rcParams.update(matplotlib.rcParamsDefault)
#matplotlib.rcParams[u'figure.figsize'] = (4,3) #matplotlib.rcParams[u'figure.figsize'] = (4,3)
matplotlib.rcParams[u'text.usetex'] = False matplotlib.rcParams[u'text.usetex'] = False
Q = 3 #Q = 3
# Define dataset # Define dataset
N = 10 #N = 10
k1 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,10,10,0.1,0.1]), ARD=True) #k1 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,10,10,0.1,0.1]), ARD=True)
k2 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,0.1,10,0.1,10]), ARD=True) #k2 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[10,0.1,10,0.1,10]), ARD=True)
k3 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[0.1,0.1,10,10,10]), ARD=True) #k3 = GPy.kern.RBF(5, variance=1, lengthscale=1./np.random.dirichlet(np.r_[0.1,0.1,10,10,10]), ARD=True)
X = np.random.normal(0, 1, (N, 5)) #X = np.random.normal(0, 1, (N, 5))
A = np.random.multivariate_normal(np.zeros(N), k1.K(X), Q).T #A = np.random.multivariate_normal(np.zeros(N), k1.K(X), Q).T
B = np.random.multivariate_normal(np.zeros(N), k2.K(X), Q).T #B = np.random.multivariate_normal(np.zeros(N), k2.K(X), Q).T
C = np.random.multivariate_normal(np.zeros(N), k3.K(X), Q).T #C = np.random.multivariate_normal(np.zeros(N), k3.K(X), Q).T
Y = np.vstack((A,B,C)) #Y = np.vstack((A,B,C))
labels = np.hstack((np.zeros(A.shape[0]), np.ones(B.shape[0]), np.ones(C.shape[0])*2)) #labels = np.hstack((np.zeros(A.shape[0]), np.ones(B.shape[0]), np.ones(C.shape[0])*2))
#k = RBF(Q, ARD=True, lengthscale=2) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
pars = np.load(os.path.join(basedir, 'b-gplvm-save.npz'))
Y = pars['Y']
Q = pars['Q']
labels = pars['labels']
import warnings
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always') # always print
m = BayesianGPLVM(Y, Q, initialize=False)
m.update_model(False)
m.initialize_parameter()
m[:] = pars['bgplvm_p']
m.update_model(True)
k = RBF(Q, ARD=True, lengthscale=2) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
m = BayesianGPLVM(Y, Q, init="PCA", kernel=k)
m.kern.lengthscale[:] = [1./.3, 1./.1, 1./.7]
m.likelihood.variance = .001
#m.optimize(messages=0) #m.optimize(messages=0)
np.random.seed(111) np.random.seed(111)
m.plot_inducing(projection='2d') m.plot_inducing(projection='2d')

View file

@ -98,7 +98,7 @@ def data_available(dataset_name=None):
try: try:
from itertools import zip_longest from itertools import zip_longest
except ImportError: except ImportError:
from itertools import zip_longest as zip_longest from itertools import izip_longest as zip_longest
dr = data_resources[dataset_name] dr = data_resources[dataset_name]
zip_urls = (dr['files'], ) zip_urls = (dr['files'], )
if 'save_names' in dr: zip_urls += (dr['save_names'], ) if 'save_names' in dr: zip_urls += (dr['save_names'], )
@ -1033,14 +1033,18 @@ def singlecell_rna_seq_deng(dataset='singlecell_deng'):
data = inner.RPKM.to_frame() data = inner.RPKM.to_frame()
data.columns = [file_info.name[:-18]] data.columns = [file_info.name[:-18]]
gene_info = inner.Refseq_IDs.to_frame() gene_info = inner.Refseq_IDs.to_frame()
gene_info.columns = [file_info.name[:-18]] gene_info.columns = ['NCBI Reference Sequence']
else: else:
data[file_info.name[:-18]] = inner.RPKM data[file_info.name[:-18]] = inner.RPKM
gene_info[file_info.name[:-18]] = inner.Refseq_IDs #gene_info[file_info.name[:-18]] = inner.Refseq_IDs
# Strip GSM number off data index # Strip GSM number off data index
rep = re.compile('GSM\d+_') rep = re.compile('GSM\d+_')
data.columns = data.columns.to_series().apply(lambda row: row[rep.match(row).end():])
from pandas import MultiIndex
columns = MultiIndex.from_tuples([row.split('_', 1) for row in data.columns])
columns.names = ['GEO Accession', 'index']
data.columns = columns
data = data.T data = data.T
# make sure the same index gets used # make sure the same index gets used

View file

@ -9,7 +9,7 @@ The Gaussian processes framework in Python.
* Travis-CI [unit-tests](https://travis-ci.org/SheffieldML/GPy) * Travis-CI [unit-tests](https://travis-ci.org/SheffieldML/GPy)
* [![licence](https://img.shields.io/badge/licence-BSD-blue.svg)](http://opensource.org/licenses/BSD-3-Clause) * [![licence](https://img.shields.io/badge/licence-BSD-blue.svg)](http://opensource.org/licenses/BSD-3-Clause)
[![develstat](https://travis-ci.org/SheffieldML/GPy.svg?branch=devel)](https://travis-ci.org/SheffieldML/GPy) [![appveyor](https://ci.appveyor.com/api/projects/status/662o6tha09m2jix3/branch/deploy?svg=true)](https://ci.appveyor.com/project/mzwiessele/gpy/branch/deploy) [![covdevel](http://codecov.io/github/SheffieldML/GPy/coverage.svg?branch=devel)](http://codecov.io/github/SheffieldML/GPy?branch=devel) [![Research software impact](http://depsy.org/api/package/pypi/GPy/badge.svg)](http://depsy.org/package/python/GPy) [![Code Health](https://landscape.io/github/SheffieldML/GPy/devel/landscape.svg?style=flat)](https://landscape.io/github/SheffieldML/GPy/devel) [![develstat](https://travis-ci.org/SheffieldML/GPy.svg?branch=devel)](https://travis-ci.org/SheffieldML/GPy) [![appveyor](https://ci.appveyor.com/api/projects/status/662o6tha09m2jix3/branch/deploy?svg=true)](https://ci.appveyor.com/project/mzwiessele/gpy/branch/deploy) [![coverallsdevel](https://coveralls.io/repos/github/SheffieldML/GPy/badge.svg?branch=devel)](https://coveralls.io/github/SheffieldML/GPy?branch=devel) [![covdevel](http://codecov.io/github/SheffieldML/GPy/coverage.svg?branch=devel)](http://codecov.io/github/SheffieldML/GPy?branch=devel) [![Research software impact](http://depsy.org/api/package/pypi/GPy/badge.svg)](http://depsy.org/package/python/GPy) [![Code Health](https://landscape.io/github/SheffieldML/GPy/devel/landscape.svg?style=flat)](https://landscape.io/github/SheffieldML/GPy/devel)
## Updated Structure ## Updated Structure

View file

@ -1,6 +1,8 @@
environment: environment:
pip_access: pip_access:
secure: 8/ZjXFwtd1S7ixd7PJOpptupKKEDhm2da/q3unabJ00= secure: 8/ZjXFwtd1S7ixd7PJOpptupKKEDhm2da/q3unabJ00=
COVERALLS_REPO_TOKEN:
secure: d3Luic/ESkGaWnZrvWZTKrzO+xaVwJWaRCEP0F+K/9DQGPSRZsJ/Du5g3s4XF+tS
gpy_version: 1.2.1 gpy_version: 1.2.1
matrix: matrix:
- PYTHON_VERSION: 2.7 - PYTHON_VERSION: 2.7
@ -25,6 +27,7 @@ install:
- python -m pip install paramz - python -m pip install paramz
- python -m pip install nose-show-skipped - python -m pip install nose-show-skipped
- python -m pip install coverage - python -m pip install coverage
- python -m pip install coveralls
- python -m pip install codecov - python -m pip install codecov
- python -m pip install twine - python -m pip install twine
- "python setup.py develop" - "python setup.py develop"

View file

@ -36,5 +36,5 @@ matplotlib.use('agg')
import nose, warnings import nose, warnings
with warnings.catch_warnings(): with warnings.catch_warnings():
warnings.simplefilter("ignore") warnings.simplefilter("ignore")
nose.main('GPy', defaultTest='GPy/testing/', argv=['', '--show-skipped']) nose.main('GPy', defaultTest='GPy/testing', argv=['', '--show-skipped'])