Merge branch 'devel' of https://github.com/SheffieldML/GPy into devel

This commit is contained in:
Neil Lawrence 2015-05-08 11:44:26 +01:00
commit c05540dc31
170 changed files with 30768 additions and 2183 deletions

View file

@ -18,7 +18,8 @@ before_install:
install: install:
- conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.7 scipy=0.12 matplotlib nose sphinx pip nose - conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.7 scipy=0.12 matplotlib nose sphinx pip nose
- pip install . #- pip install .
- python setup.py build_ext --inplace
#--use-mirrors #--use-mirrors
# #
# command to run tests, e.g. python setup.py test # command to run tests, e.g. python setup.py test

View file

@ -3,23 +3,23 @@
import warnings import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) warnings.filterwarnings("ignore", category=DeprecationWarning)
import core from . import core
from core.parameterization import transformations, priors from .core.parameterization import transformations, priors
constraints = transformations constraints = transformations
import models from . import models
import mappings from . import mappings
import inference from . import inference
import util from . import util
import examples from . import examples
import likelihoods from . import likelihoods
import testing from . import testing
from numpy.testing import Tester from numpy.testing import Tester
import kern from . import kern
import plotting from . import plotting
# Direct imports for convenience: # Direct imports for convenience:
from core import Model from .core import Model
from core.parameterization import Param, Parameterized, ObsAr from .core.parameterization import Param, Parameterized, ObsAr
#@nottest #@nottest
try: try:

View file

@ -1,12 +1,12 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from model import * from .model import *
from parameterization.parameterized import adjust_name_for_printing, Parameterizable from .parameterization.parameterized import adjust_name_for_printing, Parameterizable
from parameterization.param import Param, ParamConcatenation from .parameterization.param import Param, ParamConcatenation
from parameterization.observable_array import ObsAr from .parameterization.observable_array import ObsAr
from gp import GP from .gp import GP
from svgp import SVGP from .svgp import SVGP
from sparse_gp import SparseGP from .sparse_gp import SparseGP
from mapping import * from .mapping import *

View file

@ -4,13 +4,15 @@
import numpy as np import numpy as np
import sys import sys
from .. import kern from .. import kern
from model import Model from .model import Model
from parameterization import ObsAr from .parameterization import ObsAr
from .mapping import Mapping
from .. import likelihoods from .. import likelihoods
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from parameterization.variational import VariationalPosterior from .parameterization.variational import VariationalPosterior
import logging import logging
import warnings
from GPy.util.normalizer import MeanNorm from GPy.util.normalizer import MeanNorm
logger = logging.getLogger("GP") logger = logging.getLogger("GP")
@ -34,7 +36,7 @@ class GP(Model):
""" """
def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', Y_metadata=None, normalizer=False): def __init__(self, X, Y, kernel, likelihood, mean_function=None, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
super(GP, self).__init__(name) super(GP, self).__init__(name)
assert X.ndim == 2 assert X.ndim == 2
@ -62,10 +64,14 @@ class GP(Model):
self.Y = ObsAr(Y) self.Y = ObsAr(Y)
self.Y_normalized = self.Y self.Y_normalized = self.Y
assert Y.shape[0] == self.num_data if Y.shape[0] != self.num_data:
#There can be cases where we want inputs than outputs, for example if we have multiple latent
#function values
warnings.warn("There are more rows in your input data X, \
than in your output data Y, be VERY sure this is what you want")
_, self.output_dim = self.Y.shape _, self.output_dim = self.Y.shape
#TODO: check the type of this is okay? assert ((Y_metadata is None) or isinstance(Y_metadata, dict))
self.Y_metadata = Y_metadata self.Y_metadata = Y_metadata
assert isinstance(kernel, kern.Kern) assert isinstance(kernel, kern.Kern)
@ -75,6 +81,15 @@ class GP(Model):
assert isinstance(likelihood, likelihoods.Likelihood) assert isinstance(likelihood, likelihoods.Likelihood)
self.likelihood = likelihood self.likelihood = likelihood
#handle the mean function
self.mean_function = mean_function
if mean_function is not None:
assert isinstance(self.mean_function, Mapping)
assert mean_function.input_dim == self.input_dim
assert mean_function.output_dim == self.output_dim
self.link_parameter(mean_function)
#find a sensible inference method #find a sensible inference method
logger.info("initializing inference method") logger.info("initializing inference method")
if inference_method is None: if inference_method is None:
@ -82,14 +97,16 @@ class GP(Model):
inference_method = exact_gaussian_inference.ExactGaussianInference() inference_method = exact_gaussian_inference.ExactGaussianInference()
else: else:
inference_method = expectation_propagation.EP() inference_method = expectation_propagation.EP()
print "defaulting to ", inference_method, "for latent function inference" print("defaulting to ", inference_method, "for latent function inference")
self.inference_method = inference_method self.inference_method = inference_method
logger.info("adding kernel and likelihood as parameters") logger.info("adding kernel and likelihood as parameters")
self.link_parameter(self.kern) self.link_parameter(self.kern)
self.link_parameter(self.likelihood) self.link_parameter(self.likelihood)
self.posterior = None
def set_XY(self, X=None, Y=None):
def set_XY(self, X=None, Y=None, trigger_update=True):
""" """
Set the input / output data of the model Set the input / output data of the model
This is useful if we wish to change our existing data but maintain the same model This is useful if we wish to change our existing data but maintain the same model
@ -99,7 +116,7 @@ class GP(Model):
:param Y: output observations :param Y: output observations
:type Y: np.ndarray :type Y: np.ndarray
""" """
self.update_model(False) if trigger_update: self.update_model(False)
if Y is not None: if Y is not None:
if self.normalizer is not None: if self.normalizer is not None:
self.normalizer.scale_by(Y) self.normalizer.scale_by(Y)
@ -123,26 +140,26 @@ class GP(Model):
self.link_parameters(self.X) self.link_parameters(self.X)
else: else:
self.X = ObsAr(X) self.X = ObsAr(X)
self.update_model(True) if trigger_update: self.update_model(True)
self._trigger_params_changed() if trigger_update: self._trigger_params_changed()
def set_X(self,X): def set_X(self,X, trigger_update=True):
""" """
Set the input data of the model Set the input data of the model
:param X: input observations :param X: input observations
:type X: np.ndarray :type X: np.ndarray
""" """
self.set_XY(X=X) self.set_XY(X=X, trigger_update=trigger_update)
def set_Y(self,Y): def set_Y(self,Y, trigger_update=True):
""" """
Set the output data of the model Set the output data of the model
:param X: output observations :param X: output observations
:type X: np.ndarray :type X: np.ndarray
""" """
self.set_XY(Y=Y) self.set_XY(Y=Y, trigger_update=trigger_update)
def parameters_changed(self): def parameters_changed(self):
""" """
@ -153,9 +170,11 @@ class GP(Model):
This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
this method yourself, there may be unexpected consequences. this method yourself, there may be unexpected consequences.
""" """
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.mean_function, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL']) self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X) self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
if self.mean_function is not None:
self.mean_function.update_gradients(self.grad_dict['dL_dm'], self.X)
def log_likelihood(self): def log_likelihood(self):
""" """
@ -192,6 +211,10 @@ class GP(Model):
#force mu to be a column vector #force mu to be a column vector
if len(mu.shape)==1: mu = mu[:,None] if len(mu.shape)==1: mu = mu[:,None]
#add the mean function in
if not self.mean_function is None:
mu += self.mean_function.f(_Xnew)
return mu, var return mu, var
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None): def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
@ -241,12 +264,14 @@ class GP(Model):
def predictive_gradients(self, Xnew): def predictive_gradients(self, Xnew):
""" """
Compute the derivatives of the latent function with respect to X* Compute the derivatives of the predicted latent function with respect to X*
Given a set of points at which to predict X* (size [N*,Q]), compute the Given a set of points at which to predict X* (size [N*,Q]), compute the
derivatives of the mean and variance. Resulting arrays are sized: derivatives of the mean and variance. Resulting arrays are sized:
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one). dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
Note that this is not the same as computing the mean and variance of the derivative of the function!
dv_dX* -- [N*, Q], (since all outputs have the same variance) dv_dX* -- [N*, Q], (since all outputs have the same variance)
:param X: The points at which to get the predictive gradients :param X: The points at which to get the predictive gradients
:type X: np.ndarray (Xnew x self.input_dim) :type X: np.ndarray (Xnew x self.input_dim)
@ -276,7 +301,7 @@ class GP(Model):
:type size: int. :type size: int.
:param full_cov: whether to return the full covariance matrix, or just the diagonal. :param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool. :type full_cov: bool.
:returns: Ysim: set of simulations :returns: fsim: set of simulations
:rtype: np.ndarray (N x samples) :rtype: np.ndarray (N x samples)
""" """
m, v = self._raw_predict(X, full_cov=full_cov) m, v = self._raw_predict(X, full_cov=full_cov)
@ -284,11 +309,11 @@ class GP(Model):
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v) m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
v = v.reshape(m.size,-1) if len(v.shape)==3 else v v = v.reshape(m.size,-1) if len(v.shape)==3 else v
if not full_cov: if not full_cov:
Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T fsim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
else: else:
Ysim = np.random.multivariate_normal(m.flatten(), v, size).T fsim = np.random.multivariate_normal(m.flatten(), v, size).T
return Ysim return fsim
def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None): def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None):
""" """
@ -304,16 +329,16 @@ class GP(Model):
:type noise_model: integer. :type noise_model: integer.
:returns: Ysim: set of simulations, a Numpy array (N x samples). :returns: Ysim: set of simulations, a Numpy array (N x samples).
""" """
Ysim = self.posterior_samples_f(X, size, full_cov=full_cov) fsim = self.posterior_samples_f(X, size, full_cov=full_cov)
Ysim = self.likelihood.samples(Ysim, Y_metadata) Ysim = self.likelihood.samples(fsim, Y_metadata)
return Ysim return Ysim
def plot_f(self, plot_limits=None, which_data_rows='all', def plot_f(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[], which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None, levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=True, plot_raw=True,
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'): linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx',
apply_link=False):
""" """
Plot the GP's view of the world, where the data is normalized and before applying a likelihood. Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
This is a call to plot with plot_raw=True. This is a call to plot with plot_raw=True.
@ -350,6 +375,8 @@ class GP(Model):
:type Y_metadata: dict :type Y_metadata: dict
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx') :param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib. :type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
:param apply_link: if there is a link function of the likelihood, plot the link(f*) rather than f*
:type apply_link: boolean
""" """
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots from ..plotting.matplot_dep import models_plots
@ -362,13 +389,13 @@ class GP(Model):
which_data_ycols, fixed_inputs, which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution, levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata, plot_raw=plot_raw, Y_metadata=Y_metadata,
data_symbol=data_symbol, **kw) data_symbol=data_symbol, apply_link=apply_link, **kw)
def plot(self, plot_limits=None, which_data_rows='all', def plot(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[], which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None, levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False, plot_raw=False,
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'): linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx', predict_kw=None):
""" """
Plot the posterior of the GP. Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations. - In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -417,7 +444,7 @@ class GP(Model):
which_data_ycols, fixed_inputs, which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution, levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata, plot_raw=plot_raw, Y_metadata=Y_metadata,
data_symbol=data_symbol, **kw) data_symbol=data_symbol, predict_kw=predict_kw, **kw)
def input_sensitivity(self, summarize=True): def input_sensitivity(self, summarize=True):
""" """
@ -441,7 +468,7 @@ class GP(Model):
try: try:
super(GP, self).optimize(optimizer, start, **kwargs) super(GP, self).optimize(optimizer, start, **kwargs)
except KeyboardInterrupt: except KeyboardInterrupt:
print "KeyboardInterrupt caught, calling on_optimization_end() to round things up" print("KeyboardInterrupt caught, calling on_optimization_end() to round things up")
self.inference_method.on_optimization_end() self.inference_method.on_optimization_end()
raise raise
@ -458,3 +485,38 @@ class GP(Model):
""" """
from ..inference.latent_function_inference.inferenceX import infer_newX from ..inference.latent_function_inference.inferenceX import infer_newX
return infer_newX(self, Y_new, optimize=optimize) return infer_newX(self, Y_new, optimize=optimize)
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
"""
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param x_test: test locations (x_{*})
:type x_test: (Nx1) array
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param Y_metadata: metadata associated with the test points
"""
mu_star, var_star = self._raw_predict(x_test)
return self.likelihood.log_predictive_density(y_test, mu_star, var_star, Y_metadata=Y_metadata)
def log_predictive_density_sampling(self, x_test, y_test, Y_metadata=None, num_samples=1000):
"""
Calculation of the log predictive density by sampling
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param x_test: test locations (x_{*})
:type x_test: (Nx1) array
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param Y_metadata: metadata associated with the test points
:param num_samples: number of samples to use in monte carlo integration
:type num_samples: int
"""
mu_star, var_star = self._raw_predict(x_test)
return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples)

View file

@ -1,13 +1,14 @@
# Copyright (c) 2013,2014, GPy authors (see AUTHORS.txt). # Copyright (c) 2013,2014, GPy authors (see AUTHORS.txt).
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys import sys
from parameterization import Parameterized from .parameterization import Parameterized
import numpy as np import numpy as np
class Mapping(Parameterized): class Mapping(Parameterized):
""" """
Base model for shared behavior between models that can act like a mapping. Base model for shared mapping behaviours
""" """
def __init__(self, input_dim, output_dim, name='mapping'): def __init__(self, input_dim, output_dim, name='mapping'):
@ -18,49 +19,12 @@ class Mapping(Parameterized):
def f(self, X): def f(self, X):
raise NotImplementedError raise NotImplementedError
def df_dX(self, dL_df, X): def gradients_X(self, dL_dF, X):
"""Evaluate derivatives of mapping outputs with respect to inputs.
:param dL_df: gradient of the objective with respect to the function.
:type dL_df: ndarray (num_data x output_dim)
:param X: the input locations where derivatives are to be evaluated.
:type X: ndarray (num_data x input_dim)
:returns: matrix containing gradients of the function with respect to the inputs.
"""
raise NotImplementedError raise NotImplementedError
def df_dtheta(self, dL_df, X): def update_gradients(self, dL_dF, X):
"""The gradient of the outputs of the mapping with respect to each of the parameters.
:param dL_df: gradient of the objective with respect to the function.
:type dL_df: ndarray (num_data x output_dim)
:param X: input locations where the function is evaluated.
:type X: ndarray (num_data x input_dim)
:returns: Matrix containing gradients with respect to parameters of each output for each input data.
:rtype: ndarray (num_params length)
"""
raise NotImplementedError raise NotImplementedError
def plot(self, *args):
"""
Plots the mapping associated with the model.
- In one dimension, the function is plotted.
- In two dimensions, a contour-plot shows the function
- In higher dimensions, we've not implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions
using which_data and which_functions
This is a convenience function: arguments are passed to
GPy.plotting.matplot_dep.models_plots.plot_mapping
"""
if "matplotlib" in sys.modules:
from ..plotting.matplot_dep import models_plots
mapping_plots.plot_mapping(self,*args)
else:
raise NameError, "matplotlib package has not been imported."
class Bijective_mapping(Mapping): class Bijective_mapping(Mapping):
""" """
@ -74,72 +38,4 @@ class Bijective_mapping(Mapping):
"""Inverse mapping from output domain of the function to the inputs.""" """Inverse mapping from output domain of the function to the inputs."""
raise NotImplementedError raise NotImplementedError
from model import Model
class Mapping_check_model(Model):
"""
This is a dummy model class used as a base class for checking that the
gradients of a given mapping are implemented correctly. It enables
checkgradient() to be called independently on each mapping.
"""
def __init__(self, mapping=None, dL_df=None, X=None):
num_samples = 20
if mapping==None:
mapping = GPy.mapping.linear(1, 1)
if X==None:
X = np.random.randn(num_samples, mapping.input_dim)
if dL_df==None:
dL_df = np.ones((num_samples, mapping.output_dim))
self.mapping=mapping
self.X = X
self.dL_df = dL_df
self.num_params = self.mapping.num_params
Model.__init__(self)
def _get_params(self):
return self.mapping._get_params()
def _get_param_names(self):
return self.mapping._get_param_names()
def _set_params(self, x):
self.mapping._set_params(x)
def log_likelihood(self):
return (self.dL_df*self.mapping.f(self.X)).sum()
def _log_likelihood_gradients(self):
raise NotImplementedError, "This needs to be implemented to use the Mapping_check_model class."
class Mapping_check_df_dtheta(Mapping_check_model):
"""This class allows gradient checks for the gradient of a mapping with respect to parameters. """
def __init__(self, mapping=None, dL_df=None, X=None):
Mapping_check_model.__init__(self,mapping=mapping,dL_df=dL_df, X=X)
def _log_likelihood_gradients(self):
return self.mapping.df_dtheta(self.dL_df, self.X)
class Mapping_check_df_dX(Mapping_check_model):
"""This class allows gradient checks for the gradient of a mapping with respect to X. """
def __init__(self, mapping=None, dL_df=None, X=None):
Mapping_check_model.__init__(self,mapping=mapping,dL_df=dL_df, X=X)
if dL_df==None:
dL_df = np.ones((self.X.shape[0],self.mapping.output_dim))
self.num_params = self.X.shape[0]*self.mapping.input_dim
def _log_likelihood_gradients(self):
return self.mapping.df_dX(self.dL_df, self.X).flatten()
def _get_param_names(self):
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
def _get_params(self):
return self.X.flatten()
def _set_params(self, x):
self.X=x.reshape(self.X.shape)

View file

@ -5,7 +5,7 @@
from .. import likelihoods from .. import likelihoods
from ..inference import optimization from ..inference import optimization
from ..util.misc import opt_wrapper from ..util.misc import opt_wrapper
from parameterization import Parameterized from .parameterization import Parameterized
import multiprocessing as mp import multiprocessing as mp
import numpy as np import numpy as np
from numpy.linalg.linalg import LinAlgError from numpy.linalg.linalg import LinAlgError
@ -13,6 +13,7 @@ import itertools
import sys import sys
from .verbose_optimization import VerboseOptimization from .verbose_optimization import VerboseOptimization
# import numdifftools as ndt # import numdifftools as ndt
from functools import reduce
class Model(Parameterized): class Model(Parameterized):
_fail_count = 0 # Count of failed optimization steps (see objective) _fail_count = 0 # Count of failed optimization steps (see objective)
@ -30,7 +31,7 @@ class Model(Parameterized):
self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500) self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500)
def log_likelihood(self): def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError("this needs to be implemented to use the model class")
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return self.gradient.copy() return self.gradient.copy()
@ -82,7 +83,7 @@ class Model(Parameterized):
pool.close() # signal that no more data coming in pool.close() # signal that no more data coming in
pool.join() # wait for all the tasks to complete pool.join() # wait for all the tasks to complete
except KeyboardInterrupt: except KeyboardInterrupt:
print "Ctrl+c received, terminating and joining pool." print("Ctrl+c received, terminating and joining pool.")
pool.terminate() pool.terminate()
pool.join() pool.join()
@ -95,10 +96,10 @@ class Model(Parameterized):
self.optimization_runs.append(jobs[i].get()) self.optimization_runs.append(jobs[i].get())
if verbose: if verbose:
print("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt)) print(("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt)))
except Exception as e: except Exception as e:
if robust: if robust:
print("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts)) print(("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts)))
else: else:
raise e raise e
@ -119,7 +120,7 @@ class Model(Parameterized):
DEPRECATED. DEPRECATED.
""" """
raise DeprecationWarning, 'parameters now have default constraints' raise DeprecationWarning('parameters now have default constraints')
def objective_function(self): def objective_function(self):
""" """
@ -213,14 +214,14 @@ class Model(Parameterized):
self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e10, 1e10) self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e10, 1e10)
return obj_f, self.obj_grads return obj_f, self.obj_grads
def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, **kwargs): def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs):
""" """
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be: kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations :param max_iters: maximum number of function evaluations
:type max_f_eval: int :type max_iters: int
:messages: True: Display messages during optimisation, "ipython_notebook": :messages: True: Display messages during optimisation, "ipython_notebook":
:type messages: bool"string :type messages: bool"string
:param optimizer: which optimizer to use (defaults to self.preferred optimizer) :param optimizer: which optimizer to use (defaults to self.preferred optimizer)
@ -237,10 +238,10 @@ class Model(Parameterized):
""" """
if self.is_fixed or self.size == 0: if self.is_fixed or self.size == 0:
print 'nothing to optimize' print('nothing to optimize')
if not self.update_model(): if not self.update_model():
print "updates were off, setting updates on again" print("updates were off, setting updates on again")
self.update_model(True) self.update_model(True)
if start == None: if start == None:
@ -305,7 +306,7 @@ class Model(Parameterized):
transformed_index = (indices - (~self._fixes_).cumsum())[transformed_index[which[0]]] transformed_index = (indices - (~self._fixes_).cumsum())[transformed_index[which[0]]]
if transformed_index.size == 0: if transformed_index.size == 0:
print "No free parameters to check" print("No free parameters to check")
return return
# just check the global ratio # just check the global ratio
@ -340,9 +341,9 @@ class Model(Parameterized):
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))]) cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
cols = np.array(cols) + 5 cols = np.array(cols) + 5
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))] header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string]) header_string = list(map(lambda x: '|'.join(x), [header_string]))
separator = '-' * len(header_string[0]) separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator]) print('\n'.join([header_string[0], separator]))
if target_param is None: if target_param is None:
param_index = range(len(x)) param_index = range(len(x))
transformed_index = param_index transformed_index = param_index
@ -358,19 +359,24 @@ class Model(Parameterized):
transformed_index = param_index transformed_index = param_index
if param_index.size == 0: if param_index.size == 0:
print "No free parameters to check" print("No free parameters to check")
return return
gradient = self._grads(x).copy() gradient = self._grads(x).copy()
np.where(gradient == 0, 1e-312, gradient) np.where(gradient == 0, 1e-312, gradient)
ret = True ret = True
for nind, xind in itertools.izip(param_index, transformed_index): for nind, xind in zip(param_index, transformed_index):
xx = x.copy() xx = x.copy()
xx[xind] += step xx[xind] += step
f1 = self._objective(xx) f1 = self._objective(xx)
xx[xind] -= 2.*step xx[xind] -= 2.*step
f2 = self._objective(xx) f2 = self._objective(xx)
#Avoid divide by zero, if any of the values are above 1e-15, otherwise both values are essentiall
#the same
if f1 > 1e-15 or f1 < -1e-15 or f2 > 1e-15 or f2 < -1e-15:
df_ratio = np.abs((f1 - f2) / min(f1, f2)) df_ratio = np.abs((f1 - f2) / min(f1, f2))
else:
df_ratio = 1.0
df_unstable = df_ratio < df_tolerance df_unstable = df_ratio < df_tolerance
numerical_gradient = (f1 - f2) / (2 * step) numerical_gradient = (f1 - f2) / (2 * step)
if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind] if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
@ -392,7 +398,7 @@ class Model(Parameterized):
ng = '%.6f' % float(numerical_gradient) ng = '%.6f' % float(numerical_gradient)
df = '%1.e' % float(df_ratio) df = '%1.e' % float(df_ratio)
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}|{5:^{c5}}".format(formatted_name, r, d, g, ng, df, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4], c5=cols[5]) grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}|{5:^{c5}}".format(formatted_name, r, d, g, ng, df, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4], c5=cols[5])
print grad_string print(grad_string)
self.optimizer_array = x self.optimizer_array = x
return ret return ret
@ -402,6 +408,7 @@ class Model(Parameterized):
model_details = [['<b>Model</b>', self.name + '<br>'], model_details = [['<b>Model</b>', self.name + '<br>'],
['<b>Log-likelihood</b>', '{}<br>'.format(float(self.log_likelihood()))], ['<b>Log-likelihood</b>', '{}<br>'.format(float(self.log_likelihood()))],
["<b>Number of Parameters</b>", '{}<br>'.format(self.size)], ["<b>Number of Parameters</b>", '{}<br>'.format(self.size)],
["<b>Number of Optimization Parameters</b>", '{}<br>'.format(self._size_transformed())],
["<b>Updates</b>", '{}<br>'.format(self._update_on)], ["<b>Updates</b>", '{}<br>'.format(self._update_on)],
] ]
from operator import itemgetter from operator import itemgetter
@ -419,6 +426,7 @@ class Model(Parameterized):
model_details = [['Name', self.name], model_details = [['Name', self.name],
['Log-likelihood', '{}'.format(float(self.log_likelihood()))], ['Log-likelihood', '{}'.format(float(self.log_likelihood()))],
["Number of Parameters", '{}'.format(self.size)], ["Number of Parameters", '{}'.format(self.size)],
["Number of Optimization Parameters", '{}'.format(self._size_transformed())],
["Updates", '{}'.format(self._update_on)], ["Updates", '{}'.format(self._update_on)],
] ]
from operator import itemgetter from operator import itemgetter

View file

@ -1,5 +1,5 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from param import Param, ObsAr from .param import Param, ObsAr
from parameterized import Parameterized from .parameterized import Parameterized

View file

@ -3,7 +3,9 @@
import numpy import numpy
from numpy.lib.function_base import vectorize from numpy.lib.function_base import vectorize
from lists_and_dicts import IntArrayDict from .lists_and_dicts import IntArrayDict
from functools import reduce
from transformations import Transformation
def extract_properties_to_index(index, props): def extract_properties_to_index(index, props):
prop_index = dict() prop_index = dict()
@ -62,11 +64,14 @@ class ParameterIndexOperations(object):
def __init__(self, constraints=None): def __init__(self, constraints=None):
self._properties = IntArrayDict() self._properties = IntArrayDict()
if constraints is not None: if constraints is not None:
for t, i in constraints.iteritems(): #python 3 fix
#for t, i in constraints.iteritems():
for t, i in constraints.items():
self.add(t, i) self.add(t, i)
def iteritems(self): #iteritems has gone in python 3
return self._properties.iteritems() #def iteritems(self):
# return self._properties.iteritems()
def items(self): def items(self):
return self._properties.items() return self._properties.items()
@ -75,7 +80,7 @@ class ParameterIndexOperations(object):
return self._properties.keys() return self._properties.keys()
def iterproperties(self): def iterproperties(self):
return self._properties.iterkeys() return iter(self._properties)
def shift_right(self, start, size): def shift_right(self, start, size):
for ind in self.iterindices(): for ind in self.iterindices():
@ -83,7 +88,7 @@ class ParameterIndexOperations(object):
ind[toshift] += size ind[toshift] += size
def shift_left(self, start, size): def shift_left(self, start, size):
for v, ind in self.items(): for v, ind in list(self.items()):
todelete = (ind>=start) * (ind<start+size) todelete = (ind>=start) * (ind<start+size)
if todelete.size != 0: if todelete.size != 0:
ind = ind[~todelete] ind = ind[~todelete]
@ -101,7 +106,11 @@ class ParameterIndexOperations(object):
return reduce(lambda a,b: a+b.size, self.iterindices(), 0) return reduce(lambda a,b: a+b.size, self.iterindices(), 0)
def iterindices(self): def iterindices(self):
try:
return self._properties.itervalues() return self._properties.itervalues()
except AttributeError:
#Changed this from itervalues to values for Py3 compatibility. It didn't break the test suite.
return self._properties.values()
def indices(self): def indices(self):
return self._properties.values() return self._properties.values()
@ -150,14 +159,18 @@ class ParameterIndexOperations(object):
return numpy.array([]).astype(int) return numpy.array([]).astype(int)
def update(self, parameter_index_view, offset=0): def update(self, parameter_index_view, offset=0):
for i, v in parameter_index_view.iteritems(): #py3 fix
#for i, v in parameter_index_view.iteritems():
for i, v in parameter_index_view.items():
self.add(i, v+offset) self.add(i, v+offset)
def copy(self): def copy(self):
return self.__deepcopy__(None) return self.__deepcopy__(None)
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
return ParameterIndexOperations(dict(self.iteritems())) #py3 fix
#return ParameterIndexOperations(dict(self.iteritems()))
return ParameterIndexOperations(dict(self.items()))
def __getitem__(self, prop): def __getitem__(self, prop):
return self._properties[prop] return self._properties[prop]
@ -195,22 +208,26 @@ class ParameterIndexOperationsView(object):
def _filter_index(self, ind): def _filter_index(self, ind):
return ind[(ind >= self._offset) * (ind < (self._offset + self._size))] - self._offset return ind[(ind >= self._offset) * (ind < (self._offset + self._size))] - self._offset
#iteritems has gone in python 3. It has been renamed items()
def iteritems(self): def items(self):
for i, ind in self._param_index_ops.iteritems(): _items_list = list(self._param_index_ops.items())
for i, ind in _items_list:
ind2 = self._filter_index(ind) ind2 = self._filter_index(ind)
if ind2.size > 0: if ind2.size > 0:
yield i, ind2 yield i, ind2
def items(self): #Python 3 items() is now implemented as per py2 iteritems
return [[i,v] for i,v in self.iteritems()] #def items(self):
# return [[i,v] for i,v in self.iteritems()]
def properties(self): def properties(self):
return [i for i in self.iterproperties()] return [i for i in self.iterproperties()]
def iterproperties(self): def iterproperties(self):
for i, _ in self.iteritems(): #py3 fix
#for i, _ in self.iteritems():
for i, _ in self.items():
yield i yield i
@ -230,7 +247,9 @@ class ParameterIndexOperationsView(object):
def iterindices(self): def iterindices(self):
for _, ind in self.iteritems(): #py3 fix
#for _, ind in self.iteritems():
for _, ind in self.items():
yield ind yield ind
@ -286,10 +305,14 @@ class ParameterIndexOperationsView(object):
def __str__(self, *args, **kwargs): def __str__(self, *args, **kwargs):
import pprint import pprint
return pprint.pformat(dict(self.iteritems())) #py3 fixes
#return pprint.pformat(dict(self.iteritems()))
return pprint.pformat(dict(self.items()))
def update(self, parameter_index_view, offset=0): def update(self, parameter_index_view, offset=0):
for i, v in parameter_index_view.iteritems(): #py3 fixes
#for i, v in parameter_index_view.iteritems():
for i, v in parameter_index_view.items():
self.add(i, v+offset) self.add(i, v+offset)
@ -297,6 +320,8 @@ class ParameterIndexOperationsView(object):
return self.__deepcopy__(None) return self.__deepcopy__(None)
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
return ParameterIndexOperations(dict(self.iteritems())) #py3 fix
#return ParameterIndexOperations(dict(self.iteritems()))
return ParameterIndexOperations(dict(self.items()))
pass pass

View file

@ -32,7 +32,7 @@ class ArrayList(list):
if el is item: if el is item:
return index return index
index += 1 index += 1
raise ValueError, "{} is not in list".format(item) raise ValueError("{} is not in list".format(item))
pass pass
class ObserverList(object): class ObserverList(object):
@ -75,7 +75,7 @@ class ObserverList(object):
def __str__(self): def __str__(self):
from . import ObsAr, Param from . import ObsAr, Param
from parameter_core import Parameterizable from .parameter_core import Parameterizable
ret = [] ret = []
curr_p = None curr_p = None

View file

@ -12,7 +12,7 @@ class Observable(object):
""" """
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Observable, self).__init__() super(Observable, self).__init__()
from lists_and_dicts import ObserverList from .lists_and_dicts import ObserverList
self.observers = ObserverList() self.observers = ObserverList()
self._update_on = True self._update_on = True

View file

@ -3,8 +3,8 @@
import numpy as np import numpy as np
from parameter_core import Pickleable from .parameter_core import Pickleable
from observable import Observable from .observable import Observable
class ObsAr(np.ndarray, Pickleable, Observable): class ObsAr(np.ndarray, Pickleable, Observable):
""" """
@ -39,7 +39,7 @@ class ObsAr(np.ndarray, Pickleable, Observable):
return self.view(np.ndarray) return self.view(np.ndarray)
def copy(self): def copy(self):
from lists_and_dicts import ObserverList from .lists_and_dicts import ObserverList
memo = {} memo = {}
memo[id(self)] = self memo[id(self)] = self
memo[id(self.observers)] = ObserverList() memo[id(self.observers)] = ObserverList()

View file

@ -4,8 +4,9 @@
import itertools import itertools
import numpy import numpy
np = numpy np = numpy
from parameter_core import Parameterizable, adjust_name_for_printing, Pickleable from .parameter_core import Parameterizable, adjust_name_for_printing, Pickleable
from observable_array import ObsAr from .observable_array import ObsAr
from functools import reduce
###### printing ###### printing
__constraints_name__ = "Constraint" __constraints_name__ = "Constraint"
@ -156,7 +157,7 @@ class Param(Parameterizable, ObsAr):
#=========================================================================== #===========================================================================
@property @property
def is_fixed(self): def is_fixed(self):
from transformations import __fixed__ from .transformations import __fixed__
return self.constraints[__fixed__].size == self.size return self.constraints[__fixed__].size == self.size
def _get_original(self, param): def _get_original(self, param):
@ -207,10 +208,14 @@ class Param(Parameterizable, ObsAr):
return 0 return 0
@property @property
def _constraints_str(self): def _constraints_str(self):
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))] #py3 fix
#return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))]
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.items()))]
@property @property
def _priors_str(self): def _priors_str(self):
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.iteritems()))] #py3 fix
#return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.iteritems()))]
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.items()))]
@property @property
def _ties_str(self): def _ties_str(self):
return [''] return ['']
@ -279,7 +284,7 @@ class Param(Parameterizable, ObsAr):
.tg th{font-family:"Courier New", Courier, monospace !important;font-weight:normal;color:#fff;background-color:#26ADE4;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;} .tg th{font-family:"Courier New", Courier, monospace !important;font-weight:normal;color:#fff;background-color:#26ADE4;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;}
.tg .tg-left{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:left;} .tg .tg-left{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:left;}
.tg .tg-right{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:right;} .tg .tg-right{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:right;}
</style>"""] + ['<table class="tg">'] + [header] + ["<tr><td class=tg-left>{i}</td><td class=tg-right>{x}</td><td class=tg-left>{c}</td><td class=tg-left>{p}</td><td class=tg-left>{t}</td></tr>".format(x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)] + ["</table>"]) </style>"""] + ['<table class="tg">'] + [header] + ["<tr><td class=tg-left>{i}</td><td class=tg-right>{x}</td><td class=tg-left>{c}</td><td class=tg-left>{p}</td><td class=tg-left>{t}</td></tr>".format(x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in zip(indices, vals, constr_matrix, ties, prirs)] + ["</table>"])
def __str__(self, constr_matrix=None, indices=None, prirs=None, ties=None, lc=None, lx=None, li=None, lp=None, lt=None, only_name=False): def __str__(self, constr_matrix=None, indices=None, prirs=None, ties=None, lc=None, lx=None, li=None, lp=None, lt=None, only_name=False):
filter_ = self._current_slice_ filter_ = self._current_slice_
@ -300,7 +305,7 @@ class Param(Parameterizable, ObsAr):
if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing
else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
if not ties: ties = itertools.cycle(['']) if not ties: ties = itertools.cycle([''])
return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in zip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices
# except: return super(Param, self).__str__() # except: return super(Param, self).__str__()
class ParamConcatenation(object): class ParamConcatenation(object):
@ -313,7 +318,7 @@ class ParamConcatenation(object):
See :py:class:`GPy.core.parameter.Param` for more details on constraining. See :py:class:`GPy.core.parameter.Param` for more details on constraining.
""" """
# self.params = params # self.params = params
from lists_and_dicts import ArrayList from .lists_and_dicts import ArrayList
self.params = ArrayList([]) self.params = ArrayList([])
for p in params: for p in params:
for p in p.flattened_parameters: for p in p.flattened_parameters:
@ -336,7 +341,9 @@ class ParamConcatenation(object):
level += 1 level += 1
parent = parent._parent_ parent = parent._parent_
import operator import operator
self.parents = map(lambda x: x[0], sorted(parents.iteritems(), key=operator.itemgetter(1))) #py3 fix
#self.parents = map(lambda x: x[0], sorted(parents.iteritems(), key=operator.itemgetter(1)))
self.parents = map(lambda x: x[0], sorted(parents.items(), key=operator.itemgetter(1)))
#=========================================================================== #===========================================================================
# Get/set items, enable broadcasting # Get/set items, enable broadcasting
#=========================================================================== #===========================================================================
@ -429,14 +436,14 @@ class ParamConcatenation(object):
params = self.params params = self.params
constr_matrices, ties_matrices, prior_matrices = zip(*map(f, params)) constr_matrices, ties_matrices, prior_matrices = zip(*map(f, params))
indices = [p._indices() for p in params] indices = [p._indices() for p in params]
lc = max([p._max_len_names(cm, __constraints_name__) for p, cm in itertools.izip(params, constr_matrices)]) lc = max([p._max_len_names(cm, __constraints_name__) for p, cm in zip(params, constr_matrices)])
lx = max([p._max_len_values() for p in params]) lx = max([p._max_len_values() for p in params])
li = max([p._max_len_index(i) for p, i in itertools.izip(params, indices)]) li = max([p._max_len_index(i) for p, i in zip(params, indices)])
lt = max([p._max_len_names(tm, __tie_name__) for p, tm in itertools.izip(params, ties_matrices)]) lt = max([p._max_len_names(tm, __tie_name__) for p, tm in zip(params, ties_matrices)])
lp = max([p._max_len_names(pm, __constraints_name__) for p, pm in itertools.izip(params, prior_matrices)]) lp = max([p._max_len_names(pm, __constraints_name__) for p, pm in zip(params, prior_matrices)])
strings = [] strings = []
start = True start = True
for p, cm, i, tm, pm in itertools.izip(params,constr_matrices,indices,ties_matrices,prior_matrices): for p, cm, i, tm, pm in zip(params,constr_matrices,indices,ties_matrices,prior_matrices):
strings.append(p.__str__(constr_matrix=cm, indices=i, prirs=pm, ties=tm, lc=lc, lx=lx, li=li, lp=lp, lt=lt, only_name=(1-start))) strings.append(p.__str__(constr_matrix=cm, indices=i, prirs=pm, ties=tm, lc=lc, lx=lx, li=li, lp=lp, lt=lt, only_name=(1-start)))
start = False start = False
return "\n".join(strings) return "\n".join(strings)

View file

@ -13,11 +13,12 @@ Observable Pattern for patameterization
""" """
from transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED from .transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
import numpy as np import numpy as np
import re import re
import logging import logging
from updateable import Updateable from .updateable import Updateable
from functools import reduce
class HierarchyError(Exception): class HierarchyError(Exception):
""" """
@ -36,7 +37,7 @@ def adjust_name_for_printing(name):
name = name.replace("/", "_l_").replace("@", '_at_') name = name.replace("/", "_l_").replace("@", '_at_')
name = name.replace("(", "_of_").replace(")", "") name = name.replace("(", "_of_").replace(")", "")
if re.match(r'^[a-zA-Z_][a-zA-Z0-9-_]*$', name) is None: if re.match(r'^[a-zA-Z_][a-zA-Z0-9-_]*$', name) is None:
raise NameError, "name {} converted to {} cannot be further converted to valid python variable name!".format(name2, name) raise NameError("name {} converted to {} cannot be further converted to valid python variable name!".format(name2, name))
return name return name
return '' return ''
@ -65,13 +66,13 @@ class Parentable(object):
Gets called, when the parent changed, so we can adjust our Gets called, when the parent changed, so we can adjust our
inner attributes according to the new parent. inner attributes according to the new parent.
""" """
raise NotImplementedError, "shouldnt happen, Parentable objects need to be able to change their parent" raise NotImplementedError("shouldnt happen, Parentable objects need to be able to change their parent")
def _disconnect_parent(self, *args, **kw): def _disconnect_parent(self, *args, **kw):
""" """
Disconnect this object from its parent Disconnect this object from its parent
""" """
raise NotImplementedError, "Abstract superclass" raise NotImplementedError("Abstract superclass")
@property @property
def _highest_parent_(self): def _highest_parent_(self):
@ -109,7 +110,10 @@ class Pickleable(object):
it properly. it properly.
:param protocol: pickling protocol to use, python-pickle for details. :param protocol: pickling protocol to use, python-pickle for details.
""" """
try: #Py2
import cPickle as pickle import cPickle as pickle
except ImportError: #Py3
import pickle
if isinstance(f, str): if isinstance(f, str):
with open(f, 'wb') as f: with open(f, 'wb') as f:
pickle.dump(self, f, protocol) pickle.dump(self, f, protocol)
@ -138,9 +142,9 @@ class Pickleable(object):
which = self which = self
which.traverse_parents(parents.append) # collect parents which.traverse_parents(parents.append) # collect parents
for p in parents: for p in parents:
if not memo.has_key(id(p)):memo[id(p)] = None # set all parents to be None, so they will not be copied if not id(p) in memo :memo[id(p)] = None # set all parents to be None, so they will not be copied
if not memo.has_key(id(self.gradient)):memo[id(self.gradient)] = None # reset the gradient if not id(self.gradient) in memo:memo[id(self.gradient)] = None # reset the gradient
if not memo.has_key(id(self._fixes_)):memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent if not id(self._fixes_) in memo :memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
copy = copy.deepcopy(self, memo) # and start the copy copy = copy.deepcopy(self, memo) # and start the copy
copy._parent_index_ = None copy._parent_index_ = None
copy._trigger_params_changed() copy._trigger_params_changed()
@ -163,14 +167,16 @@ class Pickleable(object):
'_Cacher_wrap__cachers', # never pickle cachers '_Cacher_wrap__cachers', # never pickle cachers
] ]
dc = dict() dc = dict()
for k,v in self.__dict__.iteritems(): #py3 fix
#for k,v in self.__dict__.iteritems():
for k,v in self.__dict__.items():
if k not in ignore_list: if k not in ignore_list:
dc[k] = v dc[k] = v
return dc return dc
def __setstate__(self, state): def __setstate__(self, state):
self.__dict__.update(state) self.__dict__.update(state)
from lists_and_dicts import ObserverList from .lists_and_dicts import ObserverList
self.observers = ObserverList() self.observers = ObserverList()
self._setup_observers() self._setup_observers()
self._optimizer_copy_transformed = False self._optimizer_copy_transformed = False
@ -214,7 +220,7 @@ class Gradcheckable(Pickleable, Parentable):
Perform the checkgrad on the model. Perform the checkgrad on the model.
TODO: this can be done more efficiently, when doing it inside here TODO: this can be done more efficiently, when doing it inside here
""" """
raise HierarchyError, "This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!" raise HierarchyError("This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!")
class Nameable(Gradcheckable): class Nameable(Gradcheckable):
""" """
@ -268,7 +274,7 @@ class Indexable(Nameable, Updateable):
def __init__(self, name, default_constraint=None, *a, **kw): def __init__(self, name, default_constraint=None, *a, **kw):
super(Indexable, self).__init__(name=name, *a, **kw) super(Indexable, self).__init__(name=name, *a, **kw)
self._default_constraint_ = default_constraint self._default_constraint_ = default_constraint
from index_operations import ParameterIndexOperations from .index_operations import ParameterIndexOperations
self.constraints = ParameterIndexOperations() self.constraints = ParameterIndexOperations()
self.priors = ParameterIndexOperations() self.priors = ParameterIndexOperations()
if self._default_constraint_ is not None: if self._default_constraint_ is not None:
@ -310,7 +316,7 @@ class Indexable(Nameable, Updateable):
that is an int array, containing the indexes for the flattened that is an int array, containing the indexes for the flattened
param inside this parameterized logic. param inside this parameterized logic.
""" """
from param import ParamConcatenation from .param import ParamConcatenation
if isinstance(param, ParamConcatenation): if isinstance(param, ParamConcatenation):
return np.hstack((self._raveled_index_for(p) for p in param.params)) return np.hstack((self._raveled_index_for(p) for p in param.params))
return param._raveled_index() + self._offset_for(param) return param._raveled_index() + self._offset_for(param)
@ -407,7 +413,7 @@ class Indexable(Nameable, Updateable):
repriorized = self.unset_priors() repriorized = self.unset_priors()
self._add_to_index_operations(self.priors, repriorized, prior, warning) self._add_to_index_operations(self.priors, repriorized, prior, warning)
from domains import _REAL, _POSITIVE, _NEGATIVE from .domains import _REAL, _POSITIVE, _NEGATIVE
if prior.domain is _POSITIVE: if prior.domain is _POSITIVE:
self.constrain_positive(warning) self.constrain_positive(warning)
elif prior.domain is _NEGATIVE: elif prior.domain is _NEGATIVE:
@ -426,7 +432,9 @@ class Indexable(Nameable, Updateable):
"""evaluate the prior""" """evaluate the prior"""
if self.priors.size > 0: if self.priors.size > 0:
x = self.param_array x = self.param_array
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0) #py3 fix
#return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
return 0. return 0.
def _log_prior_gradients(self): def _log_prior_gradients(self):
@ -434,7 +442,9 @@ class Indexable(Nameable, Updateable):
if self.priors.size > 0: if self.priors.size > 0:
x = self.param_array x = self.param_array
ret = np.zeros(x.size) ret = np.zeros(x.size)
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()] #py3 fix
#[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
return ret return ret
return 0. return 0.
@ -536,7 +546,7 @@ class Indexable(Nameable, Updateable):
update the constraints and priors view, so that update the constraints and priors view, so that
constraining is automized for the parent. constraining is automized for the parent.
""" """
from index_operations import ParameterIndexOperationsView from .index_operations import ParameterIndexOperationsView
#if getattr(self, "_in_init_"): #if getattr(self, "_in_init_"):
#import ipdb;ipdb.set_trace() #import ipdb;ipdb.set_trace()
#self.constraints.update(param.constraints, start) #self.constraints.update(param.constraints, start)
@ -558,7 +568,7 @@ class Indexable(Nameable, Updateable):
""" """
if warning and reconstrained.size > 0: if warning and reconstrained.size > 0:
# TODO: figure out which parameters have changed and only print those # TODO: figure out which parameters have changed and only print those
print "WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name) print("WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name))
index = self._raveled_index() index = self._raveled_index()
which.add(what, index) which.add(what, index)
return index return index
@ -571,7 +581,7 @@ class Indexable(Nameable, Updateable):
if len(transforms) == 0: if len(transforms) == 0:
transforms = which.properties() transforms = which.properties()
removed = np.empty((0,), dtype=int) removed = np.empty((0,), dtype=int)
for t in transforms: for t in list(transforms):
unconstrained = which.remove(t, self._raveled_index()) unconstrained = which.remove(t, self._raveled_index())
removed = np.union1d(removed, unconstrained) removed = np.union1d(removed, unconstrained)
if t is __fixed__: if t is __fixed__:
@ -612,7 +622,9 @@ class OptimizationHandlable(Indexable):
if not self._optimizer_copy_transformed: if not self._optimizer_copy_transformed:
self._optimizer_copy_.flat = self.param_array.flat self._optimizer_copy_.flat = self.param_array.flat
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__] #py3 fix
#[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.items() if c != __fixed__]
if self.has_parent() and (self.constraints[__fixed__].size != 0 or self._has_ties()): if self.has_parent() and (self.constraints[__fixed__].size != 0 or self._has_ties()):
fixes = np.ones(self.size).astype(bool) fixes = np.ones(self.size).astype(bool)
fixes[self.constraints[__fixed__]] = FIXED fixes[self.constraints[__fixed__]] = FIXED
@ -641,21 +653,25 @@ class OptimizationHandlable(Indexable):
if f is None: if f is None:
self.param_array.flat = p self.param_array.flat = p
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind])) [np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
for c, ind in self.constraints.iteritems() if c != __fixed__] #py3 fix
#for c, ind in self.constraints.iteritems() if c != __fixed__]
for c, ind in self.constraints.items() if c != __fixed__]
else: else:
self.param_array.flat[f] = p self.param_array.flat[f] = p
[np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]])) [np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
for c, ind in self.constraints.iteritems() if c != __fixed__] #py3 fix
#for c, ind in self.constraints.iteritems() if c != __fixed__]
for c, ind in self.constraints.items() if c != __fixed__]
#self._highest_parent_.tie.propagate_val() #self._highest_parent_.tie.propagate_val()
self._optimizer_copy_transformed = False self._optimizer_copy_transformed = False
self.trigger_update() self.trigger_update()
def _get_params_transformed(self): def _get_params_transformed(self):
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!" raise DeprecationWarning("_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!")
# #
def _set_params_transformed(self, p): def _set_params_transformed(self, p):
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!" raise DeprecationWarning("_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!")
def _trigger_params_changed(self, trigger_parent=True): def _trigger_params_changed(self, trigger_parent=True):
""" """
@ -680,7 +696,9 @@ class OptimizationHandlable(Indexable):
constraint to it. constraint to it.
""" """
self._highest_parent_.tie.collate_gradient() self._highest_parent_.tie.collate_gradient()
[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__] #py3 fix
#[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.items() if c != __fixed__]
if self._has_fixes(): return g[self._fixes_] if self._has_fixes(): return g[self._fixes_]
return g return g
@ -690,7 +708,9 @@ class OptimizationHandlable(Indexable):
constraint to it. constraint to it.
""" """
self._highest_parent_.tie.collate_gradient() self._highest_parent_.tie.collate_gradient()
[np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__] #py3 fix
#[np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
[np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.items() if c != __fixed__]
if self._has_fixes(): return g[self._fixes_] if self._has_fixes(): return g[self._fixes_]
return g return g
@ -701,7 +721,7 @@ class OptimizationHandlable(Indexable):
Return the number of parameters of this parameter_handle. Return the number of parameters of this parameter_handle.
Param objects will always return 0. Param objects will always return 0.
""" """
raise NotImplemented, "Abstract, please implement in respective classes" raise NotImplemented("Abstract, please implement in respective classes")
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True): def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
""" """
@ -750,7 +770,9 @@ class OptimizationHandlable(Indexable):
self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...) self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...)
# now draw from prior where possible # now draw from prior where possible
x = self.param_array.copy() x = self.param_array.copy()
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None] #Py3 fix
#[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.items() if not p is None]
unfixlist = np.ones((self.size,),dtype=np.bool) unfixlist = np.ones((self.size,),dtype=np.bool)
unfixlist[self.constraints[__fixed__]] = False unfixlist[self.constraints[__fixed__]] = False
self.param_array.flat[unfixlist] = x.view(np.ndarray).ravel()[unfixlist] self.param_array.flat[unfixlist] = x.view(np.ndarray).ravel()[unfixlist]
@ -947,7 +969,7 @@ class Parameterizable(OptimizationHandlable):
self._add_parameter_name(param, ignore_added_names) self._add_parameter_name(param, ignore_added_names)
# and makes sure to not delete programmatically added parameters # and makes sure to not delete programmatically added parameters
for other in self.parameters[::-1]: for other in self.parameters[::-1]:
if other is not param and other.name.startswith(param.name): if other is not param and other.name == param.name:
warn_and_retry(param, _name_digit.match(other.name)) warn_and_retry(param, _name_digit.match(other.name))
return return
if pname not in dir(self): if pname not in dir(self):

View file

@ -1,15 +1,15 @@
# Copyright (c) 2014, Max Zwiessele, James Hensman # Copyright (c) 2014, Max Zwiessele, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import six # For metaclass support in Python 2 and 3 simultaneously
import numpy; np = numpy import numpy; np = numpy
import itertools import itertools
from re import compile, _pattern_type from re import compile, _pattern_type
from param import ParamConcatenation from .param import ParamConcatenation
from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
import logging import logging
from GPy.core.parameterization.index_operations import ParameterIndexOperationsView from index_operations import ParameterIndexOperationsView
logger = logging.getLogger("parameters changed meta") logger = logging.getLogger("parameters changed meta")
class ParametersChangedMeta(type): class ParametersChangedMeta(type):
@ -27,6 +27,7 @@ class ParametersChangedMeta(type):
self.parameters_changed() self.parameters_changed()
return self return self
@six.add_metaclass(ParametersChangedMeta)
class Parameterized(Parameterizable): class Parameterized(Parameterizable):
""" """
Parameterized class Parameterized class
@ -73,7 +74,9 @@ class Parameterized(Parameterizable):
# Metaclass for parameters changed after init. # Metaclass for parameters changed after init.
# This makes sure, that parameters changed will always be called after __init__ # This makes sure, that parameters changed will always be called after __init__
# **Never** call parameters_changed() yourself # **Never** call parameters_changed() yourself
__metaclass__ = ParametersChangedMeta #This is ignored in Python 3 -- you need to put the meta class in the function definition.
#__metaclass__ = ParametersChangedMeta
#The six module is used to support both Python 2 and 3 simultaneously
#=========================================================================== #===========================================================================
def __init__(self, name=None, parameters=[], *a, **kw): def __init__(self, name=None, parameters=[], *a, **kw):
super(Parameterized, self).__init__(name=name, *a, **kw) super(Parameterized, self).__init__(name=name, *a, **kw)
@ -131,7 +134,7 @@ class Parameterized(Parameterizable):
if param.has_parent(): if param.has_parent():
def visit(parent, self): def visit(parent, self):
if parent is self: if parent is self:
raise HierarchyError, "You cannot add a parameter twice into the hierarchy" raise HierarchyError("You cannot add a parameter twice into the hierarchy")
param.traverse_parents(visit, self) param.traverse_parents(visit, self)
param._parent_.unlink_parameter(param) param._parent_.unlink_parameter(param)
# make sure the size is set # make sure the size is set
@ -173,7 +176,7 @@ class Parameterized(Parameterizable):
self._highest_parent_._connect_fixes() self._highest_parent_._connect_fixes()
else: else:
raise HierarchyError, """Parameter exists already, try making a copy""" raise HierarchyError("""Parameter exists already, try making a copy""")
def link_parameters(self, *parameters): def link_parameters(self, *parameters):
@ -189,9 +192,9 @@ class Parameterized(Parameterizable):
""" """
if not param in self.parameters: if not param in self.parameters:
try: try:
raise RuntimeError, "{} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name) raise RuntimeError("{} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name))
except AttributeError: except AttributeError:
raise RuntimeError, "{} does not seem to be a parameter, remove parameters directly from their respective parents".format(str(param)) raise RuntimeError("{} does not seem to be a parameter, remove parameters directly from their respective parents".format(str(param)))
start = sum([p.size for p in self.parameters[:param._parent_index_]]) start = sum([p.size for p in self.parameters[:param._parent_index_]])
self._remove_parameter_name(param) self._remove_parameter_name(param)
@ -215,9 +218,9 @@ class Parameterized(Parameterizable):
self._highest_parent_._notify_parent_change() self._highest_parent_._notify_parent_change()
def add_parameter(self, *args, **kwargs): def add_parameter(self, *args, **kwargs):
raise DeprecationWarning, "add_parameter was renamed to link_parameter to avoid confusion of setting variables, use link_parameter instead" raise DeprecationWarning("add_parameter was renamed to link_parameter to avoid confusion of setting variables, use link_parameter instead")
def remove_parameter(self, *args, **kwargs): def remove_parameter(self, *args, **kwargs):
raise DeprecationWarning, "remove_parameter was renamed to unlink_parameter to avoid confusion of setting variables, use unlink_parameter instead" raise DeprecationWarning("remove_parameter was renamed to unlink_parameter to avoid confusion of setting variables, use unlink_parameter instead")
def _connect_parameters(self, ignore_added_names=False): def _connect_parameters(self, ignore_added_names=False):
# connect parameterlist to this parameterized object # connect parameterlist to this parameterized object
@ -237,7 +240,7 @@ class Parameterized(Parameterizable):
self._param_slices_ = [] self._param_slices_ = []
for i, p in enumerate(self.parameters): for i, p in enumerate(self.parameters):
if not p.param_array.flags['C_CONTIGUOUS']: if not p.param_array.flags['C_CONTIGUOUS']:
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS" raise ValueError("This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS")
p._parent_ = self p._parent_ = self
p._parent_index_ = i p._parent_index_ = i
@ -268,7 +271,7 @@ class Parameterized(Parameterizable):
""" """
if not isinstance(regexp, _pattern_type): regexp = compile(regexp) if not isinstance(regexp, _pattern_type): regexp = compile(regexp)
found_params = [] found_params = []
for n, p in itertools.izip(self.parameter_names(False, False, True), self.flattened_parameters): for n, p in zip(self.parameter_names(False, False, True), self.flattened_parameters):
if regexp.match(n) is not None: if regexp.match(n) is not None:
found_params.append(p) found_params.append(p)
return found_params return found_params
@ -279,7 +282,7 @@ class Parameterized(Parameterizable):
else: else:
if paramlist is None: if paramlist is None:
paramlist = self.grep_param_names(name) paramlist = self.grep_param_names(name)
if len(paramlist) < 1: raise AttributeError, name if len(paramlist) < 1: raise AttributeError(name)
if len(paramlist) == 1: if len(paramlist) == 1:
if isinstance(paramlist[-1], Parameterized): if isinstance(paramlist[-1], Parameterized):
paramlist = paramlist[-1].flattened_parameters paramlist = paramlist[-1].flattened_parameters
@ -295,7 +298,7 @@ class Parameterized(Parameterizable):
try: try:
self.param_array[name] = value self.param_array[name] = value
except: except:
raise ValueError, "Setting by slice or index only allowed with array-like" raise ValueError("Setting by slice or index only allowed with array-like")
self.trigger_update() self.trigger_update()
else: else:
try: param = self.__getitem__(name, paramlist) try: param = self.__getitem__(name, paramlist)
@ -325,7 +328,7 @@ class Parameterized(Parameterizable):
self._notify_parent_change() self._notify_parent_change()
self.parameters_changed() self.parameters_changed()
except Exception as e: except Exception as e:
print "WARNING: caught exception {!s}, trying to continue".format(e) print("WARNING: caught exception {!s}, trying to continue".format(e))
def copy(self, memo=None): def copy(self, memo=None):
if memo is None: if memo is None:
@ -379,7 +382,7 @@ class Parameterized(Parameterizable):
pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]]) pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
format_spec = "<tr><td class=tg-left>{{name:<{0}s}}</td><td class=tg-right>{{desc:>{1}s}}</td><td class=tg-left>{{const:^{2}s}}</td><td class=tg-left>{{pri:^{3}s}}</td><td class=tg-left>{{t:^{4}s}}</td></tr>".format(nl, sl, cl, pl, tl) format_spec = "<tr><td class=tg-left>{{name:<{0}s}}</td><td class=tg-right>{{desc:>{1}s}}</td><td class=tg-left>{{const:^{2}s}}</td><td class=tg-left>{{pri:^{3}s}}</td><td class=tg-left>{{t:^{4}s}}</td></tr>".format(nl, sl, cl, pl, tl)
to_print = [] to_print = []
for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs): for n, d, c, t, p in zip(names, desc, constrs, ts, prirs):
to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p)) to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3) sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
if header: if header:
@ -414,7 +417,7 @@ class Parameterized(Parameterizable):
pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]]) pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
format_spec = " \033[1m{{name:<{0}s}}\033[0;0m | {{desc:>{1}s}} | {{const:^{2}s}} | {{pri:^{3}s}} | {{t:^{4}s}}".format(nl, sl, cl, pl, tl) format_spec = " \033[1m{{name:<{0}s}}\033[0;0m | {{desc:>{1}s}} | {{const:^{2}s}} | {{pri:^{3}s}} | {{t:^{4}s}}".format(nl, sl, cl, pl, tl)
to_print = [] to_print = []
for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs): for n, d, c, t, p in zip(names, desc, constrs, ts, prirs):
to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p)) to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3) sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
if header: if header:

View file

@ -5,7 +5,7 @@
import numpy as np import numpy as np
from scipy.special import gammaln, digamma from scipy.special import gammaln, digamma
from ...util.linalg import pdinv from ...util.linalg import pdinv
from domains import _REAL, _POSITIVE from .domains import _REAL, _POSITIVE
import warnings import warnings
import weakref import weakref
@ -15,7 +15,11 @@ class Prior(object):
_instance = None _instance = None
def __new__(cls, *args, **kwargs): def __new__(cls, *args, **kwargs):
if not cls._instance or cls._instance.__class__ is not cls: if not cls._instance or cls._instance.__class__ is not cls:
cls._instance = super(Prior, cls).__new__(cls, *args, **kwargs) newfunc = super(Prior, cls).__new__
if newfunc is object.__new__:
cls._instance = newfunc(cls)
else:
cls._instance = newfunc(cls, *args, **kwargs)
return cls._instance return cls._instance
def pdf(self, x): def pdf(self, x):
@ -52,7 +56,11 @@ class Gaussian(Prior):
for instance in cls._instances: for instance in cls._instances:
if instance().mu == mu and instance().sigma == sigma: if instance().mu == mu and instance().sigma == sigma:
return instance() return instance()
o = super(Prior, cls).__new__(cls, mu, sigma) newfunc = super(Prior, cls).__new__
if newfunc is object.__new__:
o = newfunc(cls)
else:
o = newfunc(cls, mu, sigma)
cls._instances.append(weakref.ref(o)) cls._instances.append(weakref.ref(o))
return cls._instances[-1]() return cls._instances[-1]()
@ -140,7 +148,11 @@ class LogGaussian(Gaussian):
for instance in cls._instances: for instance in cls._instances:
if instance().mu == mu and instance().sigma == sigma: if instance().mu == mu and instance().sigma == sigma:
return instance() return instance()
o = super(Prior, cls).__new__(cls, mu, sigma) newfunc = super(Prior, cls).__new__
if newfunc is object.__new__:
o = newfunc(cls)
else:
o = newfunc(cls, mu, sigma)
cls._instances.append(weakref.ref(o)) cls._instances.append(weakref.ref(o))
return cls._instances[-1]() return cls._instances[-1]()
@ -258,7 +270,11 @@ class Gamma(Prior):
for instance in cls._instances: for instance in cls._instances:
if instance().a == a and instance().b == b: if instance().a == a and instance().b == b:
return instance() return instance()
o = super(Prior, cls).__new__(cls, a, b) newfunc = super(Prior, cls).__new__
if newfunc is object.__new__:
o = newfunc(cls)
else:
o = newfunc(cls, a, b)
cls._instances.append(weakref.ref(o)) cls._instances.append(weakref.ref(o))
return cls._instances[-1]() return cls._instances[-1]()
@ -398,7 +414,7 @@ class DGPLVM_KFDA(Prior):
def compute_cls(self, x): def compute_cls(self, x):
cls = {} cls = {}
# Appending each data point to its proper class # Appending each data point to its proper class
for j in xrange(self.datanum): for j in range(self.datanum):
class_label = self.get_class_label(self.lbl[j]) class_label = self.get_class_label(self.lbl[j])
if class_label not in cls: if class_label not in cls:
cls[class_label] = [] cls[class_label] = []
@ -537,7 +553,7 @@ class DGPLVM(Prior):
def compute_cls(self, x): def compute_cls(self, x):
cls = {} cls = {}
# Appending each data point to its proper class # Appending each data point to its proper class
for j in xrange(self.datanum): for j in range(self.datanum):
class_label = self.get_class_label(self.lbl[j]) class_label = self.get_class_label(self.lbl[j])
if class_label not in cls: if class_label not in cls:
cls[class_label] = [] cls[class_label] = []
@ -556,7 +572,7 @@ class DGPLVM(Prior):
# Adding data points as tuple to the dictionary so that we can access indices # Adding data points as tuple to the dictionary so that we can access indices
def compute_indices(self, x): def compute_indices(self, x):
data_idx = {} data_idx = {}
for j in xrange(self.datanum): for j in range(self.datanum):
class_label = self.get_class_label(self.lbl[j]) class_label = self.get_class_label(self.lbl[j])
if class_label not in data_idx: if class_label not in data_idx:
data_idx[class_label] = [] data_idx[class_label] = []
@ -575,7 +591,7 @@ class DGPLVM(Prior):
else: else:
lst_idx = [] lst_idx = []
# Here we put indices of each class in to the list called lst_idx_all # Here we put indices of each class in to the list called lst_idx_all
for m in xrange(len(data_idx[i])): for m in range(len(data_idx[i])):
lst_idx.append(data_idx[i][m][0]) lst_idx.append(data_idx[i][m][0])
lst_idx_all.append(lst_idx) lst_idx_all.append(lst_idx)
return lst_idx_all return lst_idx_all
@ -611,7 +627,7 @@ class DGPLVM(Prior):
# pdb.set_trace() # pdb.set_trace()
# Calculating Bi # Calculating Bi
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim) B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
for k in xrange(self.datanum): for k in range(self.datanum):
for i in data_idx: for i in data_idx:
N_i = float(len(data_idx[i])) N_i = float(len(data_idx[i]))
if k in lst_idx_all[i]: if k in lst_idx_all[i]:
@ -712,8 +728,11 @@ class DGPLVM(Prior):
return 'DGPLVM_prior_Raq' return 'DGPLVM_prior_Raq'
# ******************************************
class DGPLVM_T(Prior): from .. import Parameterized
from .. import Param
class DGPLVM_Lamda(Prior, Parameterized):
""" """
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel. Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
@ -734,16 +753,18 @@ class DGPLVM_T(Prior):
# cls._instances.append(weakref.ref(o)) # cls._instances.append(weakref.ref(o))
# return cls._instances[-1]() # return cls._instances[-1]()
def __init__(self, sigma2, lbl, x_shape, vec): def __init__(self, sigma2, lbl, x_shape, lamda, name='DP_prior'):
super(DGPLVM_Lamda, self).__init__(name=name)
self.sigma2 = sigma2 self.sigma2 = sigma2
# self.x = x # self.x = x
self.lbl = lbl self.lbl = lbl
self.lamda = lamda
self.classnum = lbl.shape[1] self.classnum = lbl.shape[1]
self.datanum = lbl.shape[0] self.datanum = lbl.shape[0]
self.x_shape = x_shape self.x_shape = x_shape
self.dim = x_shape[1] self.dim = x_shape[1]
self.vec = vec self.lamda = Param('lamda', np.diag(lamda))
self.link_parameter(self.lamda)
def get_class_label(self, y): def get_class_label(self, y):
for idx, v in enumerate(y): for idx, v in enumerate(y):
@ -764,11 +785,11 @@ class DGPLVM_T(Prior):
return cls return cls
# This function computes mean of each class. The mean is calculated through each dimension # This function computes mean of each class. The mean is calculated through each dimension
def compute_Mi(self, cls, vec): def compute_Mi(self, cls):
M_i = np.zeros((self.classnum, self.dim)) M_i = np.zeros((self.classnum, self.dim))
for i in cls: for i in cls:
# Mean of each class # Mean of each class
class_i = np.multiply(cls[i],vec) class_i = cls[i]
M_i[i] = np.mean(class_i, axis=0) M_i[i] = np.mean(class_i, axis=0)
return M_i return M_i
@ -822,7 +843,7 @@ class DGPLVM_T(Prior):
# Calculating beta and Bi for Sb # Calculating beta and Bi for Sb
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all): def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
# import pdb import pdb
# pdb.set_trace() # pdb.set_trace()
B_i = np.zeros((self.classnum, self.dim)) B_i = np.zeros((self.classnum, self.dim))
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim)) Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
@ -874,9 +895,256 @@ class DGPLVM_T(Prior):
# This function calculates log of our prior # This function calculates log of our prior
def lnpdf(self, x): def lnpdf(self, x):
x = x.reshape(self.x_shape) x = x.reshape(self.x_shape)
#!!!!!!!!!!!!!!!!!!!!!!!!!!!
#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
# print x
cls = self.compute_cls(x) cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0) M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls, self.vec) M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
xprime = x.dot(np.diagflat(self.lamda))
x = xprime
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
data_idx = self.compute_indices(x)
lst_idx_all = self.compute_listIndices(data_idx)
Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
W_i = self.compute_wj(data_idx, M_i)
Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
# Calculating inverse of Sb and its transpose and minus
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw)
# Calculating DJ/DXk
DJ_Dxk = 2 * (
Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
Sig_alpha_W_i))
# Calculating derivative of the log of the prior
DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
DPxprim_Dx = np.diagflat(self.lamda).dot(DPx_Dx)
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
DPxprim_Dx = DPxprim_Dx.T
DPxprim_Dlamda = DPx_Dx.dot(x)
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
DPxprim_Dlamda = DPxprim_Dlamda.T
self.lamda.gradient = np.diag(DPxprim_Dlamda)
# print DPxprim_Dx
return DPxprim_Dx
# def frb(self, x):
# from functools import partial
# from GPy.models import GradientChecker
# f = partial(self.lnpdf)
# df = partial(self.lnpdf_grad)
# grad = GradientChecker(f, df, x, 'X')
# grad.checkgrad(verbose=1)
def rvs(self, n):
return np.random.rand(n) # A WRONG implementation
def __str__(self):
return 'DGPLVM_prior_Raq_Lamda'
# ******************************************
class DGPLVM_T(Prior):
"""
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
:param sigma2: constant
.. Note:: DGPLVM for Classification paper implementation
"""
domain = _REAL
# _instances = []
# def __new__(cls, mu, sigma): # Singleton:
# if cls._instances:
# cls._instances[:] = [instance for instance in cls._instances if instance()]
# for instance in cls._instances:
# if instance().mu == mu and instance().sigma == sigma:
# return instance()
# o = super(Prior, cls).__new__(cls, mu, sigma)
# cls._instances.append(weakref.ref(o))
# return cls._instances[-1]()
def __init__(self, sigma2, lbl, x_shape, vec):
self.sigma2 = sigma2
# self.x = x
self.lbl = lbl
self.classnum = lbl.shape[1]
self.datanum = lbl.shape[0]
self.x_shape = x_shape
self.dim = x_shape[1]
self.vec = vec
def get_class_label(self, y):
for idx, v in enumerate(y):
if v == 1:
return idx
return -1
# This function assigns each data point to its own class
# and returns the dictionary which contains the class name and parameters.
def compute_cls(self, x):
cls = {}
# Appending each data point to its proper class
for j in range(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in cls:
cls[class_label] = []
cls[class_label].append(x[j])
return cls
# This function computes mean of each class. The mean is calculated through each dimension
def compute_Mi(self, cls):
M_i = np.zeros((self.classnum, self.dim))
for i in cls:
# Mean of each class
# class_i = np.multiply(cls[i],vec)
class_i = cls[i]
M_i[i] = np.mean(class_i, axis=0)
return M_i
# Adding data points as tuple to the dictionary so that we can access indices
def compute_indices(self, x):
data_idx = {}
for j in range(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in data_idx:
data_idx[class_label] = []
t = (j, x[j])
data_idx[class_label].append(t)
return data_idx
# Adding indices to the list so we can access whole the indices
def compute_listIndices(self, data_idx):
lst_idx = []
lst_idx_all = []
for i in data_idx:
if len(lst_idx) == 0:
pass
#Do nothing, because it is the first time list is created so is empty
else:
lst_idx = []
# Here we put indices of each class in to the list called lst_idx_all
for m in range(len(data_idx[i])):
lst_idx.append(data_idx[i][m][0])
lst_idx_all.append(lst_idx)
return lst_idx_all
# This function calculates between classes variances
def compute_Sb(self, cls, M_i, M_0):
Sb = np.zeros((self.dim, self.dim))
for i in cls:
B = (M_i[i] - M_0).reshape(self.dim, 1)
B_trans = B.transpose()
Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
return Sb
# This function calculates within classes variances
def compute_Sw(self, cls, M_i):
Sw = np.zeros((self.dim, self.dim))
for i in cls:
N_i = float(len(cls[i]))
W_WT = np.zeros((self.dim, self.dim))
for xk in cls[i]:
W = (xk - M_i[i])
W_WT += np.outer(W, W)
Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
return Sw
# Calculating beta and Bi for Sb
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
# import pdb
# pdb.set_trace()
B_i = np.zeros((self.classnum, self.dim))
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
for i in data_idx:
# pdb.set_trace()
# Calculating Bi
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
for k in range(self.datanum):
for i in data_idx:
N_i = float(len(data_idx[i]))
if k in lst_idx_all[i]:
beta = (float(1) / N_i) - (float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
else:
beta = -(float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
return Sig_beta_B_i_all
# Calculating W_j s separately so we can access all the W_j s anytime
def compute_wj(self, data_idx, M_i):
W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
xj = tpl[1]
j = tpl[0]
W_i[j] = (xj - M_i[i])
return W_i
# Calculating alpha and Wj for Sw
def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
k = tpl[0]
for j in lst_idx_all[i]:
if k == j:
alpha = 1 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
else:
alpha = 0 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
return Sig_alpha_W_i
# This function calculates log of our prior
def lnpdf(self, x):
x = x.reshape(self.x_shape)
xprim = x.dot(self.vec)
x = xprim
# print x
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0) Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i) Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1)) # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
@ -889,9 +1157,12 @@ class DGPLVM_T(Prior):
# This function calculates derivative of the log of prior function # This function calculates derivative of the log of prior function
def lnpdf_grad(self, x): def lnpdf_grad(self, x):
x = x.reshape(self.x_shape) x = x.reshape(self.x_shape)
xprim = x.dot(self.vec)
x = xprim
# print x
cls = self.compute_cls(x) cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0) M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls, self.vec) M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0) Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i) Sw = self.compute_Sw(cls, M_i)
data_idx = self.compute_indices(x) data_idx = self.compute_indices(x)

View file

@ -2,8 +2,8 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from parameterized import Parameterized from .parameterized import Parameterized
from param import Param from .param import Param
class Remapping(Parameterized): class Remapping(Parameterized):
def mapping(self): def mapping(self):
@ -98,7 +98,7 @@ class Tie(Parameterized):
if np.all(self.label_buf[idx]==0): if np.all(self.label_buf[idx]==0):
# None of p has been tied before. # None of p has been tied before.
tie_idx = self._expandTieParam(1) tie_idx = self._expandTieParam(1)
print tie_idx print(tie_idx)
tie_id = self.label_buf.max()+1 tie_id = self.label_buf.max()+1
self.label_buf[tie_idx] = tie_id self.label_buf[tie_idx] = tie_id
else: else:
@ -185,18 +185,18 @@ class Tie(Parameterized):
def _check_change(self): def _check_change(self):
changed = False changed = False
if self.tied_param is not None: if self.tied_param is not None:
for i in xrange(self.tied_param.size): for i in range(self.tied_param.size):
b0 = self.label_buf==self.label_buf[self.buf_idx[i]] b0 = self.label_buf==self.label_buf[self.buf_idx[i]]
b = self._highest_parent_.param_array[b0]!=self.tied_param[i] b = self._highest_parent_.param_array[b0]!=self.tied_param[i]
if b.sum()==0: if b.sum()==0:
print 'XXX' print('XXX')
continue continue
elif b.sum()==1: elif b.sum()==1:
print '!!!' print('!!!')
val = self._highest_parent_.param_array[b0][b][0] val = self._highest_parent_.param_array[b0][b][0]
self._highest_parent_.param_array[b0] = val self._highest_parent_.param_array[b0] = val
else: else:
print '@@@' print('@@@')
self._highest_parent_.param_array[b0] = self.tied_param[i] self._highest_parent_.param_array[b0] = self.tied_param[i]
changed = True changed = True
return changed return changed
@ -212,11 +212,11 @@ class Tie(Parameterized):
if self.tied_param is not None: if self.tied_param is not None:
self.tied_param.gradient = 0. self.tied_param.gradient = 0.
[np.put(self.tied_param.gradient, i, self._highest_parent_.gradient[self.label_buf==self.label_buf[self.buf_idx[i]]].sum()) [np.put(self.tied_param.gradient, i, self._highest_parent_.gradient[self.label_buf==self.label_buf[self.buf_idx[i]]].sum())
for i in xrange(self.tied_param.size)] for i in range(self.tied_param.size)]
def propagate_val(self): def propagate_val(self):
if self.tied_param is not None: if self.tied_param is not None:
for i in xrange(self.tied_param.size): for i in range(self.tied_param.size):
self._highest_parent_.param_array[self.label_buf==self.label_buf[self.buf_idx[i]]] = self.tied_param[i] self._highest_parent_.param_array[self.label_buf==self.label_buf[self.buf_idx[i]]] = self.tied_param[i]

View file

@ -3,7 +3,7 @@
import numpy as np import numpy as np
from domains import _POSITIVE,_NEGATIVE, _BOUNDED from .domains import _POSITIVE,_NEGATIVE, _BOUNDED
import weakref import weakref
import sys import sys
@ -72,7 +72,7 @@ class Logexp(Transformation):
return np.einsum('i,i->i', df, np.where(f>_lim_val, 1., 1. - np.exp(-f))) return np.einsum('i,i->i', df, np.where(f>_lim_val, 1., 1. - np.exp(-f)))
def initialize(self, f): def initialize(self, f):
if np.any(f < 0.): if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints" print("Warning: changing parameters to satisfy constraints")
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
return '+ve' return '+ve'
@ -130,7 +130,7 @@ class NormalTheta(Transformation):
def initialize(self, f): def initialize(self, f):
if np.any(f[self.var_indices] < 0.): if np.any(f[self.var_indices] < 0.):
print "Warning: changing parameters to satisfy constraints" print("Warning: changing parameters to satisfy constraints")
f[self.var_indices] = np.abs(f[self.var_indices]) f[self.var_indices] = np.abs(f[self.var_indices])
return f return f
@ -177,7 +177,7 @@ class NormalNaturalAntti(NormalTheta):
def initialize(self, f): def initialize(self, f):
if np.any(f[self.var_indices] < 0.): if np.any(f[self.var_indices] < 0.):
print "Warning: changing parameters to satisfy constraints" print("Warning: changing parameters to satisfy constraints")
f[self.var_indices] = np.abs(f[self.var_indices]) f[self.var_indices] = np.abs(f[self.var_indices])
return f return f
@ -220,7 +220,7 @@ class NormalEta(Transformation):
def initialize(self, f): def initialize(self, f):
if np.any(f[self.var_indices] < 0.): if np.any(f[self.var_indices] < 0.):
print "Warning: changing parameters to satisfy constraints" print("Warning: changing parameters to satisfy constraints")
f[self.var_indices] = np.abs(f[self.var_indices]) f[self.var_indices] = np.abs(f[self.var_indices])
return f return f
@ -360,7 +360,7 @@ class LogexpNeg(Transformation):
return np.einsum('i,i->i', df, np.where(f>_lim_val, -1, -1 + np.exp(-f))) return np.einsum('i,i->i', df, np.where(f>_lim_val, -1, -1 + np.exp(-f)))
def initialize(self, f): def initialize(self, f):
if np.any(f < 0.): if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints" print("Warning: changing parameters to satisfy constraints")
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
return '+ve' return '+ve'
@ -412,7 +412,7 @@ class LogexpClipped(Logexp):
return np.einsum('i,i->i', df, gf) # np.where(f < self.lower, 0, gf) return np.einsum('i,i->i', df, gf) # np.where(f < self.lower, 0, gf)
def initialize(self, f): def initialize(self, f):
if np.any(f < 0.): if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints" print("Warning: changing parameters to satisfy constraints")
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
return '+ve_c' return '+ve_c'
@ -428,7 +428,7 @@ class Exponent(Transformation):
return np.einsum('i,i->i', df, f) return np.einsum('i,i->i', df, f)
def initialize(self, f): def initialize(self, f):
if np.any(f < 0.): if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints" print("Warning: changing parameters to satisfy constraints")
return np.abs(f) return np.abs(f)
def __str__(self): def __str__(self):
return '+ve' return '+ve'
@ -468,7 +468,11 @@ class Logistic(Transformation):
for instance in cls._instances: for instance in cls._instances:
if instance().lower == lower and instance().upper == upper: if instance().lower == lower and instance().upper == upper:
return instance() return instance()
o = super(Transformation, cls).__new__(cls, lower, upper, *args, **kwargs) newfunc = super(Transformation, cls).__new__
if newfunc is object.__new__:
o = newfunc(cls)
else:
o = newfunc(cls, lower, upper, *args, **kwargs)
cls._instances.append(weakref.ref(o)) cls._instances.append(weakref.ref(o))
return cls._instances[-1]() return cls._instances[-1]()
def __init__(self, lower, upper): def __init__(self, lower, upper):
@ -486,7 +490,7 @@ class Logistic(Transformation):
return np.einsum('i,i->i', df, (f - self.lower) * (self.upper - f) / self.difference) return np.einsum('i,i->i', df, (f - self.lower) * (self.upper - f) / self.difference)
def initialize(self, f): def initialize(self, f):
if np.any(np.logical_or(f < self.lower, f > self.upper)): if np.any(np.logical_or(f < self.lower, f > self.upper)):
print "Warning: changing parameters to satisfy constraints" print("Warning: changing parameters to satisfy constraints")
#return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f) #return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
#FIXME: Max, zeros_like right? #FIXME: Max, zeros_like right?
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(np.zeros_like(f)), f) return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(np.zeros_like(f)), f)

View file

@ -3,7 +3,7 @@ Created on 11 Nov 2014
@author: maxz @author: maxz
''' '''
from observable import Observable from .observable import Observable
class Updateable(Observable): class Updateable(Observable):
@ -35,7 +35,7 @@ class Updateable(Observable):
self.trigger_update() self.trigger_update()
def toggle_update(self): def toggle_update(self):
print "deprecated: toggle_update was renamed to update_toggle for easier access" print("deprecated: toggle_update was renamed to update_toggle for easier access")
self.update_toggle() self.update_toggle()
def update_toggle(self): def update_toggle(self):
self.update_model(not self.update_model()) self.update_model(not self.update_model())

View file

@ -5,9 +5,9 @@ Created on 6 Nov 2013
''' '''
import numpy as np import numpy as np
from parameterized import Parameterized from .parameterized import Parameterized
from param import Param from .param import Param
from transformations import Logexp, Logistic,__fixed__ from .transformations import Logexp, Logistic,__fixed__
from GPy.util.misc import param_to_array from GPy.util.misc import param_to_array
from GPy.util.caching import Cache_this from GPy.util.caching import Cache_this
@ -16,13 +16,13 @@ class VariationalPrior(Parameterized):
super(VariationalPrior, self).__init__(name=name, **kw) super(VariationalPrior, self).__init__(name=name, **kw)
def KL_divergence(self, variational_posterior): def KL_divergence(self, variational_posterior):
raise NotImplementedError, "override this for variational inference of latent space" raise NotImplementedError("override this for variational inference of latent space")
def update_gradients_KL(self, variational_posterior): def update_gradients_KL(self, variational_posterior):
""" """
updates the gradients for mean and variance **in place** updates the gradients for mean and variance **in place**
""" """
raise NotImplementedError, "override this for variational inference of latent space" raise NotImplementedError("override this for variational inference of latent space")
class NormalPrior(VariationalPrior): class NormalPrior(VariationalPrior):
def KL_divergence(self, variational_posterior): def KL_divergence(self, variational_posterior):
@ -50,31 +50,29 @@ class SpikeAndSlabPrior(VariationalPrior):
def KL_divergence(self, variational_posterior): def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma,gamma1 = variational_posterior.gamma_probabilities() gamma = variational_posterior.gamma.values
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
if len(self.pi.shape)==2: if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1]) idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx] pi = self.pi[idx]
else: else:
pi = self.pi pi = self.pi
var_mean = np.square(mu)/self.variance var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S)) var_S = (S/self.variance - np.log(S))
var_gamma = (gamma*(log_gamma-np.log(pi))).sum()+(gamma1*(log_gamma1-np.log(1-pi))).sum() var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2. return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
def update_gradients_KL(self, variational_posterior): def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma,gamma1 = variational_posterior.gamma_probabilities() gamma = variational_posterior.gamma.values
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
if len(self.pi.shape)==2: if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1]) idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx] pi = self.pi[idx]
else: else:
pi = self.pi pi = self.pi
variational_posterior.binary_prob.gradient -= (np.log((1-pi)/pi)+log_gamma-log_gamma1+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.)*gamma*gamma1 variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
mu.gradient -= gamma*mu/self.variance mu.gradient -= gamma*mu/self.variance
S.gradient -= (1./self.variance - 1./S) * gamma /2. S.gradient -= (1./self.variance - 1./S) * gamma /2.
if self.learnPi: if self.learnPi:
@ -141,7 +139,7 @@ class NormalPosterior(VariationalPosterior):
holds the means and variances for a factorizing multivariate normal distribution holds the means and variances for a factorizing multivariate normal distribution
''' '''
def plot(self, *args): def plot(self, *args, **kwargs):
""" """
Plot latent space X in 1D: Plot latent space X in 1D:
@ -150,8 +148,7 @@ class NormalPosterior(VariationalPosterior):
import sys import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import variational_plots from ...plotting.matplot_dep import variational_plots
import matplotlib return variational_plots.plot(self, *args, **kwargs)
return variational_plots.plot(self,*args)
class SpikeAndSlabPosterior(VariationalPosterior): class SpikeAndSlabPosterior(VariationalPosterior):
''' '''
@ -162,25 +159,9 @@ class SpikeAndSlabPosterior(VariationalPosterior):
binary_prob : the probability of the distribution on the slab part. binary_prob : the probability of the distribution on the slab part.
""" """
super(SpikeAndSlabPosterior, self).__init__(means, variances, name) super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
self.gamma = Param("binary_prob",binary_prob) self.gamma = Param("binary_prob",binary_prob,Logistic(0.,1.))
self.link_parameter(self.gamma) self.link_parameter(self.gamma)
@Cache_this(limit=5)
def gamma_probabilities(self):
prob = np.zeros_like(param_to_array(self.gamma))
prob[self.gamma>-710] = 1./(1.+np.exp(-self.gamma[self.gamma>-710]))
prob1 = -np.zeros_like(param_to_array(self.gamma))
prob1[self.gamma<710] = 1./(1.+np.exp(self.gamma[self.gamma<710]))
return prob, prob1
@Cache_this(limit=5)
def gamma_log_prob(self):
loggamma = param_to_array(self.gamma).copy()
loggamma[loggamma>-40] = -np.log1p(np.exp(-loggamma[loggamma>-40]))
loggamma1 = -param_to_array(self.gamma).copy()
loggamma1[loggamma1>-40] = -np.log1p(np.exp(-loggamma1[loggamma1>-40]))
return loggamma,loggamma1
def set_gradients(self, grad): def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad

View file

@ -2,19 +2,15 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from gp import GP from .gp import GP
from parameterization.param import Param from .parameterization.param import Param
from ..inference.latent_function_inference import var_dtc from ..inference.latent_function_inference import var_dtc
from .. import likelihoods from .. import likelihoods
from parameterization.variational import VariationalPosterior, NormalPosterior from .parameterization.variational import VariationalPosterior, NormalPosterior
from ..util.linalg import mdot from ..util.linalg import mdot
import logging import logging
from GPy.inference.latent_function_inference.posterior import Posterior import itertools
from GPy.inference.optimization.stochastics import SparseGPStochastics,\
SparseGPMissing
#no stochastics.py file added! from GPy.inference.optimization.stochastics import SparseGPStochastics,\
#SparseGPMissing
logger = logging.getLogger("sparse gp") logger = logging.getLogger("sparse gp")
class SparseGP(GP): class SparseGP(GP):
@ -25,6 +21,10 @@ class SparseGP(GP):
(Gaussian likelihoods) as well as non-conjugate sparse methods based on (Gaussian likelihoods) as well as non-conjugate sparse methods based on
these. these.
This is not for missing data, as the implementation for missing data involves
some inefficient optimization routine decisions.
See missing data SparseGP implementation in py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
:param X: inputs :param X: inputs
:type X: np.ndarray (num_data x input_dim) :type X: np.ndarray (num_data x input_dim)
:param likelihood: a likelihood instance, containing the observed data :param likelihood: a likelihood instance, containing the observed data
@ -40,7 +40,7 @@ class SparseGP(GP):
""" """
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, inference_method=None,
name='sparse gp', Y_metadata=None, normalizer=False): name='sparse gp', Y_metadata=None, normalizer=False):
#pick a sensible inference method #pick a sensible inference method
if inference_method is None: if inference_method is None:
@ -48,13 +48,13 @@ class SparseGP(GP):
inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1]) inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
else: else:
#inference_method = ?? #inference_method = ??
raise NotImplementedError, "what to do what to do?" raise NotImplementedError("what to do what to do?")
print "defaulting to ", inference_method, "for latent function inference" print("defaulting to ", inference_method, "for latent function inference")
self.Z = Param('inducing inputs', Z) self.Z = Param('inducing inputs', Z)
self.num_inducing = Z.shape[0] self.num_inducing = Z.shape[0]
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer) GP.__init__(self, X, Y, kernel, likelihood, mean_function, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
logger.info("Adding Z as parameter") logger.info("Adding Z as parameter")
self.link_parameter(self.Z, index=0) self.link_parameter(self.Z, index=0)
@ -63,6 +63,14 @@ class SparseGP(GP):
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)
def set_Z(self, Z, trigger_update=True):
if trigger_update: self.update_model(False)
self.unlink_parameter(self.Z)
self.Z = Param('inducing inputs',Z)
self.link_parameter(self.Z, index=0)
if trigger_update: self.update_model(True)
if trigger_update: self._trigger_params_changed()
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)
@ -111,7 +119,7 @@ class SparseGP(GP):
For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we
return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD). return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD).
This is for both with and without missing data. This is for both with and without missing data. See for missing data SparseGP implementation py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
""" """
if kern is None: kern = self.kern if kern is None: kern = self.kern
@ -124,15 +132,26 @@ class SparseGP(GP):
if self.posterior.woodbury_inv.ndim == 2: if self.posterior.woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx)) var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3: elif self.posterior.woodbury_inv.ndim == 3:
var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2) var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]):
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
var = var var = var
else: else:
Kxx = kern.Kdiag(Xnew) Kxx = kern.Kdiag(Xnew)
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T if self.posterior.woodbury_inv.ndim == 2:
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
for i in range(var.shape[1]):
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
var = var
#add in the mean function
if self.mean_function is not None:
mu += self.mean_function.f(Xnew)
else: else:
psi0_star = self.kern.psi0(self.Z, Xnew) psi0_star = kern.psi0(self.Z, Xnew)
psi1_star = self.kern.psi1(self.Z, Xnew) psi1_star = kern.psi1(self.Z, Xnew)
#psi2_star = self.kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code. #psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
la = self.posterior.woodbury_vector la = self.posterior.woodbury_vector
mu = np.dot(psi1_star, la) # TODO: dimensions? mu = np.dot(psi1_star, la) # TODO: dimensions?
@ -144,7 +163,7 @@ class SparseGP(GP):
for i in range(Xnew.shape[0]): for i in range(Xnew.shape[0]):
_mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]] _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
psi2_star = self.kern.psi2(self.Z, NormalPosterior(_mu, _var)) psi2_star = kern.psi2(self.Z, NormalPosterior(_mu, _var))
tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]])) tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
var_ = mdot(la.T, tmp, la) var_ = mdot(la.T, tmp, la)
@ -158,4 +177,5 @@ class SparseGP(GP):
var[i] = var_ var[i] = var_
else: else:
var[i] = np.diag(var_)+p0-t2 var[i] = np.diag(var_)+p0-t2
return mu, var return mu, var

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from sparse_gp import SparseGP from .sparse_gp import SparseGP
from numpy.linalg.linalg import LinAlgError from numpy.linalg.linalg import LinAlgError
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
@ -56,7 +56,7 @@ class SparseGP_MPI(SparseGP):
self.N_range = (N_start, N_end) self.N_range = (N_start, N_end)
self.N_list = np.array(N_list) self.N_list = np.array(N_list)
self.Y_local = self.Y[N_start:N_end] self.Y_local = self.Y[N_start:N_end]
print 'MPI RANK '+str(self.mpi_comm.rank)+' with the data range '+str(self.N_range) print('MPI RANK '+str(self.mpi_comm.rank)+' with the data range '+str(self.N_range))
mpi_comm.Bcast(self.param_array, root=0) mpi_comm.Bcast(self.param_array, root=0)
self.update_model(True) self.update_model(True)

View file

@ -3,13 +3,13 @@
import numpy as np import numpy as np
from ..util import choleskies from ..util import choleskies
from sparse_gp import SparseGP from .sparse_gp import SparseGP
from parameterization.param import Param from .parameterization.param import Param
from ..inference.latent_function_inference import SVGP as svgp_inf from ..inference.latent_function_inference import SVGP as svgp_inf
class SVGP(SparseGP): class SVGP(SparseGP):
def __init__(self, X, Y, Z, kernel, likelihood, name='SVGP', Y_metadata=None, batchsize=None): def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, name='SVGP', Y_metadata=None, batchsize=None, num_latent_functions=None):
""" """
Stochastic Variational GP. Stochastic Variational GP.
@ -38,33 +38,45 @@ class SVGP(SparseGP):
#create the SVI inference method #create the SVI inference method
inf_method = svgp_inf() inf_method = svgp_inf()
SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, inference_method=inf_method, SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, mean_function=mean_function, inference_method=inf_method,
name=name, Y_metadata=Y_metadata, normalizer=False) name=name, Y_metadata=Y_metadata, normalizer=False)
self.m = Param('q_u_mean', np.zeros((self.num_inducing, Y.shape[1]))) #assume the number of latent functions is one per col of Y unless specified
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,Y.shape[1]))) if num_latent_functions is None:
num_latent_functions = Y.shape[1]
self.m = Param('q_u_mean', np.zeros((self.num_inducing, num_latent_functions)))
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,num_latent_functions)))
self.chol = Param('q_u_chol', chol) self.chol = Param('q_u_chol', chol)
self.link_parameter(self.chol) self.link_parameter(self.chol)
self.link_parameter(self.m) self.link_parameter(self.m)
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0])) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.mean_function, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0]))
#update the kernel gradients #update the kernel gradients
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z) self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z)
grad = self.kern.gradient.copy() grad = self.kern.gradient.copy()
self.kern.update_gradients_full(self.grad_dict['dL_dKmn'], self.Z, self.X) self.kern.update_gradients_full(self.grad_dict['dL_dKmn'], self.Z, self.X)
grad += self.kern.gradient grad += self.kern.gradient.copy()
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X) self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
self.kern.gradient += grad self.kern.gradient += grad
if not self.Z.is_fixed:# only compute these expensive gradients if we need them if not self.Z.is_fixed:# only compute these expensive gradients if we need them
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) + self.kern.gradients_X(self.grad_dict['dL_dKmn'], self.Z, self.X) self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) + self.kern.gradients_X(self.grad_dict['dL_dKmn'], self.Z, self.X)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL']) self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
#update the variational parameter gradients: #update the variational parameter gradients:
self.m.gradient = self.grad_dict['dL_dm'] self.m.gradient = self.grad_dict['dL_dm']
self.chol.gradient = self.grad_dict['dL_dchol'] self.chol.gradient = self.grad_dict['dL_dchol']
if self.mean_function is not None:
self.mean_function.update_gradients(self.grad_dict['dL_dmfX'], self.X)
g = self.mean_function.gradient[:].copy()
self.mean_function.update_gradients(self.grad_dict['dL_dmfZ'], self.Z)
self.mean_function.gradient[:] += g
self.Z.gradient[:] += self.mean_function.gradients_X(self.grad_dict['dL_dmfZ'], self.Z)
def set_data(self, X, Y): def set_data(self, X, Y):
""" """
Set the data without calling parameters_changed to avoid wasted computation Set the data without calling parameters_changed to avoid wasted computation

View file

@ -223,7 +223,7 @@ class Symbolic_core():
def code_gradients_cacheable(self, function, variable): def code_gradients_cacheable(self, function, variable):
if variable not in self.cacheable: if variable not in self.cacheable:
raise RuntimeError, variable + ' must be a cacheable.' raise RuntimeError(variable + ' must be a cacheable.')
lcode = 'gradients_' + variable + ' = np.zeros_like(' + variable + ')\n' lcode = 'gradients_' + variable + ' = np.zeros_like(' + variable + ')\n'
lcode += 'self.update_cache(' + ', '.join(self.cacheable) + ')\n' lcode += 'self.update_cache(' + ', '.join(self.cacheable) + ')\n'
for i, theta in enumerate(self.variables[variable]): for i, theta in enumerate(self.variables[variable]):

View file

@ -1,7 +1,7 @@
# Copyright (c) 2012-2014, Max Zwiessele. # Copyright (c) 2012-2014, Max Zwiessele.
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from __future__ import print_function
import numpy as np import numpy as np
import sys import sys
import time import time
@ -11,7 +11,7 @@ def exponents(fnow, current_grad):
return np.sign(exps) * np.log10(exps).astype(int) return np.sign(exps) * np.log10(exps).astype(int)
class VerboseOptimization(object): class VerboseOptimization(object):
def __init__(self, model, opt, maxiters, verbose=False, current_iteration=0, ipython_notebook=True): def __init__(self, model, opt, maxiters, verbose=False, current_iteration=0, ipython_notebook=True, clear_after_finish=False):
self.verbose = verbose self.verbose = verbose
if self.verbose: if self.verbose:
self.model = model self.model = model
@ -22,30 +22,31 @@ class VerboseOptimization(object):
self.opt_name = opt.opt_name self.opt_name = opt.opt_name
self.model.add_observer(self, self.print_status) self.model.add_observer(self, self.print_status)
self.status = 'running' self.status = 'running'
self.clear = clear_after_finish
self.update() self.update()
try: try:
from IPython.display import display from IPython.display import display
from IPython.html.widgets import FloatProgressWidget, HTMLWidget, ContainerWidget from IPython.html.widgets import IntProgress, HTML, Box, VBox, HBox, FlexBox
self.text = HTMLWidget() self.text = HTML(width='100%')
self.progress = FloatProgressWidget() self.progress = IntProgress(min=0, max=maxiters)
self.model_show = HTMLWidget() #self.progresstext = Text(width='100%', disabled=True, value='0/{}'.format(maxiters))
self.model_show = HTML()
self.ipython_notebook = ipython_notebook self.ipython_notebook = ipython_notebook
except: except:
# Not in Ipython notebook # Not in Ipython notebook
self.ipython_notebook = False self.ipython_notebook = False
if self.ipython_notebook: if self.ipython_notebook:
left_col = VBox(children=[self.progress, self.text], padding=2, width='40%')
right_col = Box(children=[self.model_show], padding=2, width='60%')
self.hor_align = FlexBox(children = [left_col, right_col], width='100%', orientation='horizontal')
display(self.hor_align)
try:
self.text.set_css('width', '100%') self.text.set_css('width', '100%')
#self.progress.set_css('width', '100%')
left_col = ContainerWidget(children = [self.progress, self.text])
right_col = ContainerWidget(children = [self.model_show])
hor_align = ContainerWidget(children = [left_col, right_col])
display(hor_align)
left_col.set_css({ left_col.set_css({
'padding': '2px', 'padding': '2px',
'width': "100%", 'width': "100%",
@ -55,22 +56,25 @@ class VerboseOptimization(object):
'padding': '2px', 'padding': '2px',
}) })
hor_align.set_css({ self.hor_align.set_css({
'width': "100%", 'width': "100%",
}) })
hor_align.remove_class('vbox') self.hor_align.remove_class('vbox')
hor_align.add_class('hbox') self.hor_align.add_class('hbox')
left_col.add_class("box-flex1") left_col.add_class("box-flex1")
right_col.add_class('box-flex0') right_col.add_class('box-flex0')
except:
pass
#self.text.add_class('box-flex2') #self.text.add_class('box-flex2')
#self.progress.add_class('box-flex1') #self.progress.add_class('box-flex1')
else: else:
self.exps = exponents(self.fnow, self.current_gradient) self.exps = exponents(self.fnow, self.current_gradient)
print 'Running {} Code:'.format(self.opt_name) print('Running {} Code:'.format(self.opt_name))
print ' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "secs", mi=self.len_maxiters) print(' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "secs", mi=self.len_maxiters))
def __enter__(self): def __enter__(self):
self.start = time.time() self.start = time.time()
@ -102,7 +106,8 @@ class VerboseOptimization(object):
html_body += "<td class='tg-right'>{}</td>".format(val) html_body += "<td class='tg-right'>{}</td>".format(val)
html_body += "</tr>" html_body += "</tr>"
self.text.value = html_begin + html_body + html_end self.text.value = html_begin + html_body + html_end
self.progress.value = 100*(self.iteration+1)/self.maxiters self.progress.value = (self.iteration+1)
#self.progresstext.value = '0/{}'.format((self.iteration+1))
self.model_show.value = self.model._repr_html_() self.model_show.value = self.model._repr_html_()
else: else:
n_exps = exponents(self.fnow, self.current_gradient) n_exps = exponents(self.fnow, self.current_gradient)
@ -111,11 +116,11 @@ class VerboseOptimization(object):
b = np.any(n_exps < self.exps) b = np.any(n_exps < self.exps)
if a or b: if a or b:
self.p_iter = self.iteration self.p_iter = self.iteration
print '' print('')
if b: if b:
self.exps = n_exps self.exps = n_exps
print '\r', print('\r', end=' ')
print '{3:> 7.2g} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), time.time()-self.start, mi=self.len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', print('{3:> 7.2g} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), time.time()-self.start, mi=self.len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush() sys.stdout.flush()
def print_status(self, me, which=None): def print_status(self, me, which=None):
@ -136,6 +141,13 @@ class VerboseOptimization(object):
def finish(self, opt): def finish(self, opt):
self.status = opt.status self.status = opt.status
if self.verbose and self.ipython_notebook:
if 'conv' in self.status.lower():
self.progress.bar_style = 'success'
elif self.iteration >= self.maxiters:
self.progress.bar_style = 'warning'
else:
self.progress.bar_style = 'danger'
def __exit__(self, type, value, traceback): def __exit__(self, type, value, traceback):
if self.verbose: if self.verbose:
@ -144,7 +156,9 @@ class VerboseOptimization(object):
self.print_out() self.print_out()
if not self.ipython_notebook: if not self.ipython_notebook:
print '' print()
print 'Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start) print('Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start))
print 'Optimization status: {0:.5g}'.format(self.status) print('Optimization status: {0}'.format(self.status))
print print()
elif self.clear:
self.hor_align.close()

View file

@ -25,3 +25,6 @@ MKL = False
[weave] [weave]
#if true, try to use weave, and fall back to numpy. if false, just use numpy. #if true, try to use weave, and fall back to numpy. if false, just use numpy.
working = True working = True
[cython]
working = True

View file

@ -1,7 +1,7 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import classification from . import classification
import regression from . import regression
import dimensionality_reduction from . import dimensionality_reduction
import non_gaussian from . import non_gaussian

View file

@ -15,7 +15,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
""" """
try:import pods try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets' except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
data = pods.datasets.oil() data = pods.datasets.oil()
X = data['X'] X = data['X']
Xtest = data['Xtest'] Xtest = data['Xtest']
@ -52,7 +52,7 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
""" """
try:import pods try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets' except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
data = pods.datasets.toy_linear_1d_classification(seed=seed) data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
@ -75,7 +75,7 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
m.plot_f(ax=axes[0]) m.plot_f(ax=axes[0])
m.plot(ax=axes[1]) m.plot(ax=axes[1])
print m print(m)
return m return m
def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=True): def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=True):
@ -88,7 +88,7 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
""" """
try:import pods try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets' except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
data = pods.datasets.toy_linear_1d_classification(seed=seed) data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
@ -114,7 +114,7 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
m.plot_f(ax=axes[0]) m.plot_f(ax=axes[0])
m.plot(ax=axes[1]) m.plot(ax=axes[1])
print m print(m)
return m return m
def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, optimize=True, plot=True): def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, optimize=True, plot=True):
@ -127,7 +127,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
""" """
try:import pods try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets' except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
data = pods.datasets.toy_linear_1d_classification(seed=seed) data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
@ -147,7 +147,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
m.plot_f(ax=axes[0]) m.plot_f(ax=axes[0])
m.plot(ax=axes[1]) m.plot(ax=axes[1])
print m print(m)
return m return m
def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True): def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
@ -160,7 +160,7 @@ def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
""" """
try:import pods try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets' except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
data = pods.datasets.toy_linear_1d_classification(seed=seed) data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
@ -177,7 +177,7 @@ def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
# Parameters optimization: # Parameters optimization:
for _ in range(5): for _ in range(5):
m.optimize(max_iters=int(max_iters/5)) m.optimize(max_iters=int(max_iters/5))
print m print(m)
# Plot # Plot
if plot: if plot:
@ -186,7 +186,7 @@ def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
m.plot_f(ax=axes[0]) m.plot_f(ax=axes[0])
m.plot(ax=axes[1]) m.plot(ax=axes[1])
print m print(m)
return m return m
def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None, optimize=True, plot=True): def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None, optimize=True, plot=True):
@ -202,7 +202,7 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
:type kernel: a GPy kernel :type kernel: a GPy kernel
""" """
try:import pods try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets' except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
data = pods.datasets.crescent_data(seed=seed) data = pods.datasets.crescent_data(seed=seed)
Y = data['Y'] Y = data['Y']
Y[Y.flatten()==-1] = 0 Y[Y.flatten()==-1] = 0
@ -224,5 +224,5 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
if plot: if plot:
m.plot() m.plot()
print m print(m)
return m return m

View file

@ -335,7 +335,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
m.likelihood.variance = .1 m.likelihood.variance = .1
if optimize: if optimize:
print "Optimizing model:" print("Optimizing model:")
m.optimize('bfgs', messages=verbose, max_iters=max_iters, m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
@ -360,7 +360,7 @@ def ssgplvm_simulation(optimize=True, verbose=1,
m.likelihood.variance = .1 m.likelihood.variance = .1
if optimize: if optimize:
print "Optimizing model:" print("Optimizing model:")
m.optimize('scg', messages=verbose, max_iters=max_iters, m.optimize('scg', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
@ -390,7 +390,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
m.Yreal = Y m.Yreal = Y
if optimize: if optimize:
print "Optimizing model:" print("Optimizing model:")
m.optimize('bfgs', messages=verbose, max_iters=max_iters, m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
@ -414,7 +414,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
m['.*noise'] = [Y.var() / 40. for Y in Ylist] m['.*noise'] = [Y.var() / 40. for Y in Ylist]
if optimize: if optimize:
print "Optimizing Model:" print("Optimizing Model:")
m.optimize(messages=verbose, max_iters=8e3) m.optimize(messages=verbose, max_iters=8e3)
if plot: if plot:
m.X.plot("MRD Latent Space 1D") m.X.plot("MRD Latent Space 1D")
@ -442,7 +442,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
initx="random", initz='permute', **kw) initx="random", initz='permute', **kw)
if optimize: if optimize:
print "Optimizing Model:" print("Optimizing Model:")
m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1) m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
if plot: if plot:
m.X.plot("MRD Latent Space 1D") m.X.plot("MRD Latent Space 1D")
@ -607,7 +607,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
try: try:
if optimize: m.optimize('bfgs', messages=verbose, max_iters=5e3, bfgs_factor=10) if optimize: m.optimize('bfgs', messages=verbose, max_iters=5e3, bfgs_factor=10)
except KeyboardInterrupt: except KeyboardInterrupt:
print "Keyboard interrupt, continuing to plot and return" print("Keyboard interrupt, continuing to plot and return")
if plot: if plot:
fig, (latent_axes, sense_axes) = plt.subplots(1, 2) fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
@ -658,7 +658,7 @@ def ssgplvm_simulation_linear():
def sample_X(Q, pi): def sample_X(Q, pi):
x = np.empty(Q) x = np.empty(Q)
dies = np.random.rand(Q) dies = np.random.rand(Q)
for q in xrange(Q): for q in range(Q):
if dies[q] < pi: if dies[q] < pi:
x[q] = np.random.randn() x[q] = np.random.randn()
else: else:
@ -668,7 +668,7 @@ def ssgplvm_simulation_linear():
Y = np.empty((N, D)) Y = np.empty((N, D))
X = np.empty((N, Q)) X = np.empty((N, Q))
# Generate data from random sampled weight matrices # Generate data from random sampled weight matrices
for n in xrange(N): for n in range(N):
X[n] = sample_X(Q, pi) X[n] = sample_X(Q, pi)
w = np.random.randn(D, Q) w = np.random.randn(D, Q)
Y[n] = np.dot(w, X[n]) Y[n] = np.dot(w, X[n])

View file

@ -37,7 +37,7 @@ def student_t_approx(optimize=True, plot=True):
#Add student t random noise to datapoints #Add student t random noise to datapoints
deg_free = 1 deg_free = 1
print "Real noise: ", real_std print("Real noise: ", real_std)
initial_var_guess = 0.5 initial_var_guess = 0.5
edited_real_sd = initial_var_guess edited_real_sd = initial_var_guess
@ -73,7 +73,7 @@ def student_t_approx(optimize=True, plot=True):
m4['.*t_scale2'].constrain_bounded(1e-6, 10.) m4['.*t_scale2'].constrain_bounded(1e-6, 10.)
m4['.*white'].constrain_fixed(1e-5) m4['.*white'].constrain_fixed(1e-5)
m4.randomize() m4.randomize()
print m4 print(m4)
debug=True debug=True
if debug: if debug:
m4.optimize(messages=1) m4.optimize(messages=1)
@ -81,18 +81,18 @@ def student_t_approx(optimize=True, plot=True):
pb.plot(m4.X, m4.inference_method.f_hat) pb.plot(m4.X, m4.inference_method.f_hat)
pb.plot(m4.X, m4.Y, 'rx') pb.plot(m4.X, m4.Y, 'rx')
m4.plot() m4.plot()
print m4 print(m4)
return m4 return m4
if optimize: if optimize:
optimizer='scg' optimizer='scg'
print "Clean Gaussian" print("Clean Gaussian")
m1.optimize(optimizer, messages=1) m1.optimize(optimizer, messages=1)
print "Corrupt Gaussian" print("Corrupt Gaussian")
m2.optimize(optimizer, messages=1) m2.optimize(optimizer, messages=1)
print "Clean student t" print("Clean student t")
m3.optimize(optimizer, messages=1) m3.optimize(optimizer, messages=1)
print "Corrupt student t" print("Corrupt student t")
m4.optimize(optimizer, messages=1) m4.optimize(optimizer, messages=1)
if plot: if plot:
@ -151,7 +151,7 @@ def boston_example(optimize=True, plot=True):
for n, (train, test) in enumerate(kf): for n, (train, test) in enumerate(kf):
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test] X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
print "Fold {}".format(n) print("Fold {}".format(n))
noise = 1e-1 #np.exp(-2) noise = 1e-1 #np.exp(-2)
rbf_len = 0.5 rbf_len = 0.5
@ -163,21 +163,21 @@ def boston_example(optimize=True, plot=True):
score_folds[0, n] = rmse(Y_test, np.mean(Y_train)) score_folds[0, n] = rmse(Y_test, np.mean(Y_train))
#Gaussian GP #Gaussian GP
print "Gauss GP" print("Gauss GP")
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy()) mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
mgp.constrain_fixed('.*white', 1e-5) mgp.constrain_fixed('.*white', 1e-5)
mgp['.*len'] = rbf_len mgp['.*len'] = rbf_len
mgp['.*noise'] = noise mgp['.*noise'] = noise
print mgp print(mgp)
if optimize: if optimize:
mgp.optimize(optimizer=optimizer, messages=messages) mgp.optimize(optimizer=optimizer, messages=messages)
Y_test_pred = mgp.predict(X_test) Y_test_pred = mgp.predict(X_test)
score_folds[1, n] = rmse(Y_test, Y_test_pred[0]) score_folds[1, n] = rmse(Y_test, Y_test_pred[0])
pred_density[1, n] = np.mean(mgp.log_predictive_density(X_test, Y_test)) pred_density[1, n] = np.mean(mgp.log_predictive_density(X_test, Y_test))
print mgp print(mgp)
print pred_density print(pred_density)
print "Gaussian Laplace GP" print("Gaussian Laplace GP")
N, D = Y_train.shape N, D = Y_train.shape
g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D) g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution) g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
@ -186,18 +186,18 @@ def boston_example(optimize=True, plot=True):
mg.constrain_fixed('.*white', 1e-5) mg.constrain_fixed('.*white', 1e-5)
mg['rbf_len'] = rbf_len mg['rbf_len'] = rbf_len
mg['noise'] = noise mg['noise'] = noise
print mg print(mg)
if optimize: if optimize:
mg.optimize(optimizer=optimizer, messages=messages) mg.optimize(optimizer=optimizer, messages=messages)
Y_test_pred = mg.predict(X_test) Y_test_pred = mg.predict(X_test)
score_folds[2, n] = rmse(Y_test, Y_test_pred[0]) score_folds[2, n] = rmse(Y_test, Y_test_pred[0])
pred_density[2, n] = np.mean(mg.log_predictive_density(X_test, Y_test)) pred_density[2, n] = np.mean(mg.log_predictive_density(X_test, Y_test))
print pred_density print(pred_density)
print mg print(mg)
for stu_num, df in enumerate(degrees_freedoms): for stu_num, df in enumerate(degrees_freedoms):
#Student T #Student T
print "Student-T GP {}df".format(df) print("Student-T GP {}df".format(df))
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution) stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood) mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
@ -205,14 +205,14 @@ def boston_example(optimize=True, plot=True):
mstu_t.constrain_bounded('.*t_scale2', 0.0001, 1000) mstu_t.constrain_bounded('.*t_scale2', 0.0001, 1000)
mstu_t['rbf_len'] = rbf_len mstu_t['rbf_len'] = rbf_len
mstu_t['.*t_scale2'] = noise mstu_t['.*t_scale2'] = noise
print mstu_t print(mstu_t)
if optimize: if optimize:
mstu_t.optimize(optimizer=optimizer, messages=messages) mstu_t.optimize(optimizer=optimizer, messages=messages)
Y_test_pred = mstu_t.predict(X_test) Y_test_pred = mstu_t.predict(X_test)
score_folds[3+stu_num, n] = rmse(Y_test, Y_test_pred[0]) score_folds[3+stu_num, n] = rmse(Y_test, Y_test_pred[0])
pred_density[3+stu_num, n] = np.mean(mstu_t.log_predictive_density(X_test, Y_test)) pred_density[3+stu_num, n] = np.mean(mstu_t.log_predictive_density(X_test, Y_test))
print pred_density print(pred_density)
print mstu_t print(mstu_t)
if plot: if plot:
plt.figure() plt.figure()
@ -230,8 +230,8 @@ def boston_example(optimize=True, plot=True):
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x') plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
plt.title('Stu t {}df'.format(df)) plt.title('Stu t {}df'.format(df))
print "Average scores: {}".format(np.mean(score_folds, 1)) print("Average scores: {}".format(np.mean(score_folds, 1)))
print "Average pred density: {}".format(np.mean(pred_density, 1)) print("Average pred density: {}".format(np.mean(pred_density, 1)))
if plot: if plot:
#Plotting #Plotting

View file

@ -15,7 +15,7 @@ def olympic_marathon_men(optimize=True, plot=True):
"""Run a standard Gaussian process regression on the Olympic marathon data.""" """Run a standard Gaussian process regression on the Olympic marathon data."""
try:import pods try:import pods
except ImportError: except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets' print('pods unavailable, see https://github.com/sods/ods for example datasets')
return return
data = pods.datasets.olympic_marathon_men() data = pods.datasets.olympic_marathon_men()
@ -88,7 +88,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
""" """
try:import pods try:import pods
except ImportError: except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets' print('pods unavailable, see https://github.com/sods/ods for example datasets')
return return
data = pods.datasets.epomeo_gpx() data = pods.datasets.epomeo_gpx()
num_data_list = [] num_data_list = []
@ -135,7 +135,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
try:import pods try:import pods
except ImportError: except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets' print('pods unavailable, see https://github.com/sods/ods for example datasets')
return return
data = pods.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number) data = pods.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number)
# data['Y'] = data['Y'][0::2, :] # data['Y'] = data['Y'][0::2, :]
@ -219,7 +219,7 @@ def olympic_100m_men(optimize=True, plot=True):
"""Run a standard Gaussian process regression on the Rogers and Girolami olympics data.""" """Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
try:import pods try:import pods
except ImportError: except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets' print('pods unavailable, see https://github.com/sods/ods for example datasets')
return return
data = pods.datasets.olympic_100m_men() data = pods.datasets.olympic_100m_men()
@ -240,7 +240,7 @@ def toy_rbf_1d(optimize=True, plot=True):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
try:import pods try:import pods
except ImportError: except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets' print('pods unavailable, see https://github.com/sods/ods for example datasets')
return return
data = pods.datasets.toy_rbf_1d() data = pods.datasets.toy_rbf_1d()
@ -258,7 +258,7 @@ def toy_rbf_1d_50(optimize=True, plot=True):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
try:import pods try:import pods
except ImportError: except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets' print('pods unavailable, see https://github.com/sods/ods for example datasets')
return return
data = pods.datasets.toy_rbf_1d_50() data = pods.datasets.toy_rbf_1d_50()
@ -377,7 +377,7 @@ def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
"""Predict the location of a robot given wirelss signal strength readings.""" """Predict the location of a robot given wirelss signal strength readings."""
try:import pods try:import pods
except ImportError: except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets' print('pods unavailable, see https://github.com/sods/ods for example datasets')
return return
data = pods.datasets.robot_wireless() data = pods.datasets.robot_wireless()
@ -398,14 +398,14 @@ def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
sse = ((data['Xtest'] - Xpredict)**2).sum() sse = ((data['Xtest'] - Xpredict)**2).sum()
print('Sum of squares error on test data: ' + str(sse)) print(('Sum of squares error on test data: ' + str(sse)))
return m return m
def silhouette(max_iters=100, optimize=True, plot=True): def silhouette(max_iters=100, optimize=True, plot=True):
"""Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper.""" """Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
try:import pods try:import pods
except ImportError: except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets' print('pods unavailable, see https://github.com/sods/ods for example datasets')
return return
data = pods.datasets.silhouette() data = pods.datasets.silhouette()
@ -416,7 +416,7 @@ def silhouette(max_iters=100, optimize=True, plot=True):
if optimize: if optimize:
m.optimize(messages=True, max_iters=max_iters) m.optimize(messages=True, max_iters=max_iters)
print m print(m)
return m return m
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=False): def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=False):
@ -468,7 +468,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt
if plot: if plot:
m.plot() m.plot()
print m print(m)
return m return m
def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True): def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
@ -492,7 +492,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
if plot: if plot:
m.plot(ax=axes[0]) m.plot(ax=axes[0])
axes[0].set_title('no input uncertainty') axes[0].set_title('no input uncertainty')
print m print(m)
# the same Model with uncertainty # the same Model with uncertainty
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S) m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S)
@ -503,5 +503,50 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
axes[1].set_title('with input uncertainty') axes[1].set_title('with input uncertainty')
fig.canvas.draw() fig.canvas.draw()
print m print(m)
return m return m
def simple_mean_function(max_iters=100, optimize=True, plot=True):
"""
The simplest possible mean function. No parameters, just a simple Sinusoid.
"""
#create simple mean function
mf = GPy.core.Mapping(1,1)
mf.f = np.sin
mf.update_gradients = lambda a,b: None
X = np.linspace(0,10,50).reshape(-1,1)
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape)
k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian()
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
if optimize:
m.optimize(max_iters=max_iters)
if plot:
m.plot(plot_limits=(-10,15))
return m
def parametric_mean_function(max_iters=100, optimize=True, plot=True):
"""
A linear mean function with parameters that we'll learn alongside the kernel
"""
#create simple mean function
mf = GPy.core.Mapping(1,1)
mf.f = np.sin
X = np.linspace(0,10,50).reshape(-1,1)
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X
mf = GPy.mappings.Linear(1,1)
k =GPy.kern.RBF(1)
lik = GPy.likelihoods.Gaussian()
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
if optimize:
m.optimize(max_iters=max_iters)
if plot:
m.plot()
return m

View file

@ -1,3 +1,3 @@
import latent_function_inference from . import latent_function_inference
import optimization from . import optimization
import mcmc from . import mcmc

View file

@ -61,15 +61,15 @@ class InferenceMethodList(LatentFunctionInference, list):
for inf in state: for inf in state:
self.append(inf) self.append(inf)
from exact_gaussian_inference import ExactGaussianInference from .exact_gaussian_inference import ExactGaussianInference
from laplace import Laplace from .laplace import Laplace,LaplaceBlock
from GPy.inference.latent_function_inference.var_dtc import VarDTC from GPy.inference.latent_function_inference.var_dtc import VarDTC
from expectation_propagation import EP from .expectation_propagation import EP
from expectation_propagation_dtc import EPDTC from .expectation_propagation_dtc import EPDTC
from dtc import DTC from .dtc import DTC
from fitc import FITC from .fitc import FITC
from var_dtc_parallel import VarDTC_minibatch from .var_dtc_parallel import VarDTC_minibatch
from svgp import SVGP from .svgp import SVGP
# class FullLatentFunctionData(object): # class FullLatentFunctionData(object):
# #

View file

@ -1,7 +1,7 @@
# Copyright (c) 2012-2014, James Hensman # Copyright (c) 2012-2014, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior from .posterior import Posterior
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
import numpy as np import numpy as np
from . import LatentFunctionInference from . import LatentFunctionInference
@ -20,7 +20,8 @@ class DTC(LatentFunctionInference):
def __init__(self): def __init__(self):
self.const_jitter = 1e-6 self.const_jitter = 1e-6
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
assert mean_function is None, "inference with a mean function not implemented"
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC." assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
num_inducing, _ = Z.shape num_inducing, _ = Z.shape
@ -29,7 +30,7 @@ class DTC(LatentFunctionInference):
#make sure the noise is not hetero #make sure the noise is not hetero
beta = 1./likelihood.gaussian_variance(Y_metadata) beta = 1./likelihood.gaussian_variance(Y_metadata)
if beta.size > 1: if beta.size > 1:
raise NotImplementedError, "no hetero noise with this implementation of DTC" raise NotImplementedError("no hetero noise with this implementation of DTC")
Kmm = kern.K(Z) Kmm = kern.K(Z)
Knn = kern.Kdiag(X) Knn = kern.Kdiag(X)
@ -88,7 +89,8 @@ class vDTC(object):
def __init__(self): def __init__(self):
self.const_jitter = 1e-6 self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y, Y_metadata): def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
assert mean_function is None, "inference with a mean function not implemented"
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC." assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
num_inducing, _ = Z.shape num_inducing, _ = Z.shape
@ -97,7 +99,7 @@ class vDTC(object):
#make sure the noise is not hetero #make sure the noise is not hetero
beta = 1./likelihood.gaussian_variance(Y_metadata) beta = 1./likelihood.gaussian_variance(Y_metadata)
if beta.size > 1: if beta.size > 1:
raise NotImplementedError, "no hetero noise with this implementation of DTC" raise NotImplementedError("no hetero noise with this implementation of DTC")
Kmm = kern.K(Z) Kmm = kern.K(Z)
Knn = kern.Kdiag(X) Knn = kern.Kdiag(X)

View file

@ -1,7 +1,7 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior from .posterior import Posterior
from ...util.linalg import pdinv, dpotrs, tdot from ...util.linalg import pdinv, dpotrs, tdot
from ...util import diag from ...util import diag
import numpy as np import numpy as np
@ -36,16 +36,23 @@ class ExactGaussianInference(LatentFunctionInference):
#print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!" #print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
return Y return Y
def inference(self, kern, X, likelihood, Y, Y_metadata=None): def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
""" """
Returns a Posterior class containing essential quantities of the posterior Returns a Posterior class containing essential quantities of the posterior
""" """
YYT_factor = self.get_YYTfactor(Y)
if mean_function is None:
m = 0
else:
m = mean_function.f(X)
YYT_factor = self.get_YYTfactor(Y-m)
K = kern.K(X) K = kern.K(X)
Ky = K.copy() Ky = K.copy()
diag.add(Ky, likelihood.gaussian_variance(Y_metadata)) diag.add(Ky, likelihood.gaussian_variance(Y_metadata)+1e-8)
Wi, LW, LWi, W_logdet = pdinv(Ky) Wi, LW, LWi, W_logdet = pdinv(Ky)
alpha, _ = dpotrs(LW, YYT_factor, lower=1) alpha, _ = dpotrs(LW, YYT_factor, lower=1)
@ -56,4 +63,18 @@ class ExactGaussianInference(LatentFunctionInference):
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata) dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata)
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL} return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None):
"""
Leave one out error as found in
"Bayesian leave-one-out cross-validation approximations for Gaussian latent variable models"
Vehtari et al. 2014.
"""
g = posterior.woodbury_vector
c = posterior.woodbury_inv
c_diag = np.diag(c)[:, None]
neg_log_marginal_LOO = 0.5*np.log(2*np.pi) - 0.5*np.log(c_diag) + 0.5*(g**2)/c_diag
#believe from Predictive Approaches for Choosing Hyperparameters in Gaussian Processes
#this is the negative marginal LOO
return -neg_log_marginal_LOO

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs
from posterior import Posterior from .posterior import Posterior
from . import LatentFunctionInference from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
@ -33,15 +33,19 @@ class EP(LatentFunctionInference):
# TODO: update approximation in the end as well? Maybe even with a switch? # TODO: update approximation in the end as well? Maybe even with a switch?
pass pass
def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None): def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, Z=None):
assert mean_function is None, "inference with a mean function not implemented"
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
assert output_dim ==1, "ep in 1D only (for now!)" assert output_dim ==1, "ep in 1D only (for now!)"
K = kern.K(X) K = kern.K(X)
if self._ep_approximation is None: if self._ep_approximation is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata) mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
else: else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde)) Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))

View file

@ -6,7 +6,7 @@ from ...util import diag
from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR
from ...core.parameterization.variational import VariationalPosterior from ...core.parameterization.variational import VariationalPosterior
from . import LatentFunctionInference from . import LatentFunctionInference
from posterior import Posterior from .posterior import Posterior
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
class EPDTC(LatentFunctionInference): class EPDTC(LatentFunctionInference):
@ -64,7 +64,8 @@ class EPDTC(LatentFunctionInference):
self.old_mutilde, self.old_vtilde = None, None self.old_mutilde, self.old_vtilde = None, None
self._ep_approximation = None self._ep_approximation = None
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
assert mean_function is None, "inference with a mean function not implemented"
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
assert output_dim ==1, "ep in 1D only (for now!)" assert output_dim ==1, "ep in 1D only (for now!)"
@ -179,7 +180,7 @@ class EPDTC(LatentFunctionInference):
if VVT_factor.shape[1] == Y.shape[1]: if VVT_factor.shape[1] == Y.shape[1]:
woodbury_vector = Cpsi1Vf # == Cpsi1V woodbury_vector = Cpsi1Vf # == Cpsi1V
else: else:
print 'foobar' print('foobar')
psi1V = np.dot(mu_tilde[:,None].T*beta, psi1).T psi1V = np.dot(mu_tilde[:,None].T*beta, psi1).T
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
tmp, _ = dpotrs(LB, tmp, lower=1) tmp, _ = dpotrs(LB, tmp, lower=1)
@ -314,7 +315,7 @@ def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf,
dL_dR = None dL_dR = None
elif het_noise: elif het_noise:
if uncertain_inputs: if uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" raise NotImplementedError("heteroscedatic derivates with uncertain inputs not implemented")
else: else:
#from ...util.linalg import chol_inv #from ...util.linalg import chol_inv
#LBi = chol_inv(LB) #LBi = chol_inv(LB)

View file

@ -1,7 +1,7 @@
# Copyright (c) 2012, James Hensman # Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior from .posterior import Posterior
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
from ...util import diag from ...util import diag
import numpy as np import numpy as np
@ -18,7 +18,8 @@ class FITC(LatentFunctionInference):
""" """
const_jitter = 1e-6 const_jitter = 1e-6
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
assert mean_function is None, "inference with a mean function not implemented"
num_inducing, _ = Z.shape num_inducing, _ = Z.shape
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
@ -26,7 +27,7 @@ class FITC(LatentFunctionInference):
#make sure the noise is not hetero #make sure the noise is not hetero
sigma_n = likelihood.gaussian_variance(Y_metadata) sigma_n = likelihood.gaussian_variance(Y_metadata)
if sigma_n.size >1: if sigma_n.size >1:
raise NotImplementedError, "no hetero noise with this implementation of FITC" raise NotImplementedError("no hetero noise with this implementation of FITC")
Kmm = kern.K(Z) Kmm = kern.K(Z)
Knn = kern.Kdiag(X) Knn = kern.Kdiag(X)

View file

@ -12,13 +12,14 @@
import numpy as np import numpy as np
from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify, pdinv from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify, pdinv
from posterior import Posterior from .posterior import Posterior
import warnings import warnings
def warning_on_one_line(message, category, filename, lineno, file=None, line=None): def warning_on_one_line(message, category, filename, lineno, file=None, line=None):
return ' %s:%s: %s:%s\n' % (filename, lineno, category.__name__, message) return ' %s:%s: %s:%s\n' % (filename, lineno, category.__name__, message)
warnings.formatwarning = warning_on_one_line warnings.formatwarning = warning_on_one_line
from scipy import optimize from scipy import optimize
from . import LatentFunctionInference from . import LatentFunctionInference
from scipy.integrate import quad
class Laplace(LatentFunctionInference): class Laplace(LatentFunctionInference):
@ -39,10 +40,90 @@ class Laplace(LatentFunctionInference):
self.first_run = True self.first_run = True
self._previous_Ki_fhat = None self._previous_Ki_fhat = None
def inference(self, kern, X, likelihood, Y, Y_metadata=None): def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None, f_hat=None, W=None, Ki_W_i=None):
"""
Leave one out log predictive density as found in
"Bayesian leave-one-out cross-validation approximations for Gaussian latent variable models"
Vehtari et al. 2014.
"""
Ki_f_init = np.zeros_like(Y)
if K is None:
K = kern.K(X)
if f_hat is None:
f_hat, _ = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
if W is None:
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
if Ki_W_i is None:
_, _, _, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
logpdf_dfhat = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata)
if W.shape[1] == 1:
W = np.diagflat(W)
#Eq 14, and 16
var_site = 1./np.diag(W)[:, None]
mu_site = f_hat + var_site*logpdf_dfhat
prec_site = 1./var_site
#Eq 19
marginal_cov = Ki_W_i
marginal_mu = marginal_cov.dot(np.diagflat(prec_site)).dot(mu_site)
marginal_var = np.diag(marginal_cov)[:, None]
#Eq 30 with using site parameters instead of Gaussian site parameters
#(var_site instead of sigma^{2} )
posterior_cav_var = 1./(1./marginal_var - 1./var_site)
posterior_cav_mean = posterior_cav_var*((1./marginal_var)*marginal_mu - (1./var_site)*Y)
flat_y = Y.flatten()
flat_mu = posterior_cav_mean.flatten()
flat_var = posterior_cav_var.flatten()
if Y_metadata is not None:
#Need to zip individual elements of Y_metadata aswell
Y_metadata_flat = {}
if Y_metadata is not None:
for key, val in Y_metadata.items():
Y_metadata_flat[key] = np.atleast_1d(val).reshape(-1, 1)
zipped_values = []
for i in range(Y.shape[0]):
y_m = {}
for key, val in Y_metadata_flat.items():
if np.isscalar(val) or val.shape[0] == 1:
y_m[key] = val
else:
#Won't broadcast yet
y_m[key] = val[i]
zipped_values.append((flat_y[i], flat_mu[i], flat_var[i], y_m))
else:
#Otherwise just pass along None's
zipped_values = zip(flat_y, flat_mu, flat_var, [None]*Y.shape[0])
def integral_generator(yi, mi, vi, yi_m):
def f(fi_star):
#More stable in the log space
p_fi = np.exp(likelihood.logpdf(fi_star, yi, yi_m)
- 0.5*np.log(2*np.pi*vi)
- 0.5*np.square(mi-fi_star)/vi)
return p_fi
return f
#Eq 30
p_ystar, _ = zip(*[quad(integral_generator(y, m, v, yi_m), -np.inf, np.inf)
for y, m, v, yi_m in zipped_values])
p_ystar = np.array(p_ystar).reshape(-1, 1)
return np.log(p_ystar)
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
""" """
Returns a Posterior class containing essential quantities of the posterior Returns a Posterior class containing essential quantities of the posterior
""" """
assert mean_function is None, "inference with a mean function not implemented"
# Compute K # Compute K
K = kern.K(X) K = kern.K(X)
@ -50,21 +131,25 @@ class Laplace(LatentFunctionInference):
#Find mode #Find mode
if self.bad_fhat or self.first_run: if self.bad_fhat or self.first_run:
Ki_f_init = np.zeros_like(Y) Ki_f_init = np.zeros_like(Y)
first_run = False self.first_run = False
else: else:
Ki_f_init = self._previous_Ki_fhat Ki_f_init = self._previous_Ki_fhat
Ki_f_init = np.zeros_like(Y)# FIXME: take this out
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata) f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
self.f_hat = f_hat self.f_hat = f_hat
self.Ki_fhat = Ki_fhat #self.Ki_fhat = Ki_fhat
self.K = K.copy() #self.K = K.copy()
#Compute hessian and other variables at mode #Compute hessian and other variables at mode
log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata) log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
self._previous_Ki_fhat = Ki_fhat.copy() self._previous_Ki_fhat = Ki_fhat.copy()
return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL} return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None): def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs):
""" """
Rasmussen's numerically stable mode finding Rasmussen's numerically stable mode finding
For nomenclature see Rasmussen & Williams 2006 For nomenclature see Rasmussen & Williams 2006
@ -89,7 +174,12 @@ class Laplace(LatentFunctionInference):
#define the objective function (to be maximised) #define the objective function (to be maximised)
def obj(Ki_f, f): def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata)) ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
if np.isnan(ll):
return -np.inf
else:
return ll
difference = np.inf difference = np.inf
iteration = 0 iteration = 0
@ -104,7 +194,7 @@ class Laplace(LatentFunctionInference):
W_f = W*f W_f = W*f
b = W_f + grad # R+W p46 line 6. b = W_f + grad # R+W p46 line 6.
W12BiW12, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave) W12BiW12, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs)
W12BiW12Kb = np.dot(W12BiW12, np.dot(K, b)) W12BiW12Kb = np.dot(W12BiW12, np.dot(K, b))
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet #Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
@ -121,7 +211,9 @@ class Laplace(LatentFunctionInference):
step = optimize.brent(inner_obj, tol=1e-4, maxiter=12) step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
Ki_f_new = Ki_f + step*dKi_f Ki_f_new = Ki_f + step*dKi_f
f_new = np.dot(K, Ki_f_new) f_new = np.dot(K, Ki_f_new)
#print "new {} vs old {}".format(obj(Ki_f_new, f_new), obj(Ki_f, f))
if obj(Ki_f_new, f_new) < obj(Ki_f, f):
raise ValueError("Shouldn't happen, brent optimization failing")
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f)) difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
Ki_f = Ki_f_new Ki_f = Ki_f_new
f = f_new f = f_new
@ -152,14 +244,10 @@ class Laplace(LatentFunctionInference):
if np.any(np.isnan(W)): if np.any(np.isnan(W)):
raise ValueError('One or more element(s) of W is NaN') raise ValueError('One or more element(s) of W is NaN')
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave) K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
#compute vital matrices
C = np.dot(LiW12, K)
Ki_W_i = K - C.T.dot(C)
#compute the log marginal #compute the log marginal
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - np.sum(np.log(np.diag(L))) log_marginal = -0.5*np.sum(np.dot(Ki_f.T, f_hat)) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*logdet_I_KW
# Compute matrices for derivatives # Compute matrices for derivatives
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
@ -196,23 +284,23 @@ class Laplace(LatentFunctionInference):
dL_dthetaL = np.zeros(num_params) dL_dthetaL = np.zeros(num_params)
for thetaL_i in range(num_params): for thetaL_i in range(num_params):
#Explicit #Explicit
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i]) dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i,:, :])
# The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL # The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL
+ 0.5*np.sum(np.diag(Ki_W_i).flatten()*dlik_hess_dthetaL[:, thetaL_i].flatten()) + 0.5*np.sum(np.diag(Ki_W_i)*np.squeeze(dlik_hess_dthetaL[thetaL_i, :, :]))
) )
#Implicit #Implicit
dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[:, thetaL_i]) dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[thetaL_i, :, :])
#dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[:, thetaL_i]) #dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[thetaL_i, :, :])
dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL) dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL)
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp dL_dthetaL[thetaL_i] = np.sum(dL_dthetaL_exp + dL_dthetaL_imp)
else: else:
dL_dthetaL = np.zeros(likelihood.size) dL_dthetaL = np.zeros(likelihood.size)
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave): def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):
""" """
Rasmussen suggests the use of a numerically stable positive definite matrix B Rasmussen suggests the use of a numerically stable positive definite matrix B
Which has a positive diagonal elements and can be easily inverted Which has a positive diagonal elements and can be easily inverted
@ -225,7 +313,7 @@ class Laplace(LatentFunctionInference):
""" """
if not log_concave: if not log_concave:
#print "Under 1e-10: {}".format(np.sum(W < 1e-6)) #print "Under 1e-10: {}".format(np.sum(W < 1e-6))
W[W<1e-6] = 1e-6 W = np.clip(W, 1e-6, 1e+30)
# NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!! # NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!!
#W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur #W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance # If the likelihood is non-log-concave. We wan't to say that there is a negative variance
@ -247,5 +335,160 @@ class Laplace(LatentFunctionInference):
#K_Wi_i_2 , _= dpotri(L2) #K_Wi_i_2 , _= dpotri(L2)
#symmetrify(K_Wi_i_2) #symmetrify(K_Wi_i_2)
return K_Wi_i, L, LiW12 #compute vital matrices
C = np.dot(LiW12, K)
Ki_W_i = K - C.T.dot(C)
I_KW_i = np.eye(K.shape[0]) - np.dot(K, K_Wi_i)
logdet_I_KW = 2*np.sum(np.log(np.diag(L)))
return K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i
class LaplaceBlock(Laplace):
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs):
Ki_f = Ki_f_init.copy()
f = np.dot(K, Ki_f)
#define the objective function (to be maximised)
def obj(Ki_f, f):
ll = -0.5*np.dot(Ki_f.T, f) + np.sum(likelihood.logpdf_sum(f, Y, Y_metadata=Y_metadata))
if np.isnan(ll):
return -np.inf
else:
return ll
difference = np.inf
iteration = 0
I = np.eye(K.shape[0])
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30)
W_f = np.dot(W, f)
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
b = W_f + grad # R+W p46 line 6.
K_Wi_i, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs)
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
#a = (I - (K+Wi)i*K)*b
full_step_Ki_f = np.dot(I - np.dot(K_Wi_i, K), b)
dKi_f = full_step_Ki_f - Ki_f
#define an objective for the line search (minimize this one)
def inner_obj(step_size):
Ki_f_trial = Ki_f + step_size*dKi_f
f_trial = np.dot(K, Ki_f_trial)
return -obj(Ki_f_trial, f_trial)
#use scipy for the line search, the compute new values of f, Ki_f
step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
Ki_f_new = Ki_f + step*dKi_f
f_new = np.dot(K, Ki_f_new)
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
Ki_f = Ki_f_new
f = f_new
iteration += 1
#Warn of bad fits
if difference > self._mode_finding_tolerance:
if not self.bad_fhat:
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
self._previous_Ki_fhat = np.zeros_like(Y)
self.bad_fhat = True
elif self.bad_fhat:
self.bad_fhat = False
warnings.warn("f_hat now fine again")
if iteration > self._mode_finding_max_iter:
warnings.warn("didn't find the best")
return f, Ki_f
def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, kern, Y_metadata):
#At this point get the hessian matrix (or vector as W is diagonal)
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30)
K_Wi_i, log_B_det, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
#compute the log marginal
#FIXME: The derterminant should be output_dim*0.5 I think, gradients may now no longer check
log_marginal = -0.5*np.dot(f_hat.T, Ki_f) + np.sum(likelihood.logpdf_sum(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*log_B_det
#Compute vival matrices for derivatives
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
#dL_dfhat = np.zeros((f_hat.shape[0]))
#for i in range(f_hat.shape[0]):
#dL_dfhat[i] = -0.5*np.trace(np.dot(Ki_W_i, dW_df[:,:,i]))
dL_dfhat = -0.5*np.einsum('ij,ijk->k', Ki_W_i, dW_df)
woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata)
####################
#compute dL_dK#
####################
if kern.size > 0 and not kern.is_fixed:
#Explicit
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
#Implicit
implicit_part = woodbury_vector.dot(dL_dfhat[None,:]).dot(I_KW_i)
#implicit_part = Ki_f.dot(dL_dfhat[None,:]).dot(I_KW_i)
dL_dK = explicit_part + implicit_part
else:
dL_dK = np.zeros_like(K)
####################
#compute dL_dthetaL#
####################
if likelihood.size > 0 and not likelihood.is_fixed:
raise NotImplementedError
else:
dL_dthetaL = np.zeros(likelihood.size)
#self.K_Wi_i = K_Wi_i
#self.Ki_W_i = Ki_W_i
#self.W = W
#self.K = K
#self.dL_dfhat = dL_dfhat
#self.explicit_part = explicit_part
#self.implicit_part = implicit_part
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):
"""
Rasmussen suggests the use of a numerically stable positive definite matrix B
Which has a positive diagonal element and can be easyily inverted
:param K: Prior Covariance matrix evaluated at locations X
:type K: NxN matrix
:param W: Negative hessian at a point (diagonal matrix)
:type W: Vector of diagonal values of hessian (1xN)
:returns: (K_Wi_i, L_B, not_provided)
"""
#w = GPy.util.diag.view(W)
#W[:] = np.where(w<1e-6, 1e-6, w)
#B = I + KW
B = np.eye(K.shape[0]) + np.dot(K, W)
#Bi, L, Li, logdetB = pdinv(B)
Bi = np.linalg.inv(B)
#K_Wi_i = np.eye(K.shape[0]) - mdot(W, Bi, K)
K_Wi_i = np.dot(W, Bi)
#self.K_Wi_i_brute = np.linalg.inv(K + np.linalg.inv(W))
#self.B = B
#self.Bi = Bi
Ki_W_i = np.dot(Bi, K)
sign, logdetB = np.linalg.slogdet(B)
return K_Wi_i, sign*logdetB, Bi, Ki_W_i

View file

@ -15,7 +15,7 @@ class Posterior(object):
the function at any new point x_* by integrating over this posterior. the function at any new point x_* by integrating over this posterior.
""" """
def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None): def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None, prior_mean=0):
""" """
woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K
woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M
@ -52,7 +52,7 @@ class Posterior(object):
or ((mean is not None) and (cov is not None)): or ((mean is not None) and (cov is not None)):
pass # we have sufficient to compute the posterior pass # we have sufficient to compute the posterior
else: else:
raise ValueError, "insufficient information to compute the posterior" raise ValueError("insufficient information to compute the posterior")
self._K_chol = K_chol self._K_chol = K_chol
self._K = K self._K = K
@ -67,6 +67,7 @@ class Posterior(object):
#option 2: #option 2:
self._mean = mean self._mean = mean
self._covariance = cov self._covariance = cov
self._prior_mean = prior_mean
#compute this lazily #compute this lazily
self._precision = None self._precision = None
@ -107,7 +108,7 @@ class Posterior(object):
if self._precision is None: if self._precision is None:
cov = np.atleast_3d(self.covariance) cov = np.atleast_3d(self.covariance)
self._precision = np.zeros(cov.shape) # if one covariance per dimension self._precision = np.zeros(cov.shape) # if one covariance per dimension
for p in xrange(cov.shape[-1]): for p in range(cov.shape[-1]):
self._precision[:,:,p] = pdinv(cov[:,:,p])[0] self._precision[:,:,p] = pdinv(cov[:,:,p])[0]
return self._precision return self._precision
@ -125,7 +126,7 @@ class Posterior(object):
if self._woodbury_inv is not None: if self._woodbury_inv is not None:
winv = np.atleast_3d(self._woodbury_inv) winv = np.atleast_3d(self._woodbury_inv)
self._woodbury_chol = np.zeros(winv.shape) self._woodbury_chol = np.zeros(winv.shape)
for p in xrange(winv.shape[-1]): for p in range(winv.shape[-1]):
self._woodbury_chol[:,:,p] = pdinv(winv[:,:,p])[2] self._woodbury_chol[:,:,p] = pdinv(winv[:,:,p])[2]
#Li = jitchol(self._woodbury_inv) #Li = jitchol(self._woodbury_inv)
#self._woodbury_chol, _ = dtrtri(Li) #self._woodbury_chol, _ = dtrtri(Li)
@ -134,13 +135,13 @@ class Posterior(object):
#self._woodbury_chol = jitchol(W) #self._woodbury_chol = jitchol(W)
#try computing woodbury chol from cov #try computing woodbury chol from cov
elif self._covariance is not None: elif self._covariance is not None:
raise NotImplementedError, "TODO: check code here" raise NotImplementedError("TODO: check code here")
B = self._K - self._covariance B = self._K - self._covariance
tmp, _ = dpotrs(self.K_chol, B) tmp, _ = dpotrs(self.K_chol, B)
self._woodbury_inv, _ = dpotrs(self.K_chol, tmp.T) self._woodbury_inv, _ = dpotrs(self.K_chol, tmp.T)
_, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv) _, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
else: else:
raise ValueError, "insufficient information to compute posterior" raise ValueError("insufficient information to compute posterior")
return self._woodbury_chol return self._woodbury_chol
@property @property
@ -160,7 +161,7 @@ class Posterior(object):
elif self._covariance is not None: elif self._covariance is not None:
B = np.atleast_3d(self._K) - np.atleast_3d(self._covariance) B = np.atleast_3d(self._K) - np.atleast_3d(self._covariance)
self._woodbury_inv = np.empty_like(B) self._woodbury_inv = np.empty_like(B)
for i in xrange(B.shape[-1]): for i in range(B.shape[-1]):
tmp, _ = dpotrs(self.K_chol, B[:,:,i]) tmp, _ = dpotrs(self.K_chol, B[:,:,i])
self._woodbury_inv[:,:,i], _ = dpotrs(self.K_chol, tmp.T) self._woodbury_inv[:,:,i], _ = dpotrs(self.K_chol, tmp.T)
return self._woodbury_inv return self._woodbury_inv
@ -175,7 +176,7 @@ class Posterior(object):
$$ $$
""" """
if self._woodbury_vector is None: if self._woodbury_vector is None:
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean) self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean - self._prior_mean)
return self._woodbury_vector return self._woodbury_vector
@property @property

View file

@ -2,17 +2,22 @@ from . import LatentFunctionInference
from ...util import linalg from ...util import linalg
from ...util import choleskies from ...util import choleskies
import numpy as np import numpy as np
from posterior import Posterior from .posterior import Posterior
class SVGP(LatentFunctionInference): class SVGP(LatentFunctionInference):
def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None, KL_scale=1.0, batch_scale=1.0): def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
num_inducing = Z.shape[0]
num_data, num_outputs = Y.shape num_data, _ = Y.shape
num_inducing, num_outputs = q_u_mean.shape
#expand cholesky representation #expand cholesky representation
L = choleskies.flat_to_triang(q_u_chol) L = choleskies.flat_to_triang(q_u_chol)
S = np.einsum('ijk,ljk->ilk', L, L) #L.dot(L.T)
S = np.empty((num_outputs, num_inducing, num_inducing))
[np.dot(L[:,:,i], L[:,:,i].T, S[i,:,:]) for i in range(num_outputs)]
S = S.swapaxes(0,2)
#Si,_ = linalg.dpotri(np.asfortranarray(L), lower=1) #Si,_ = linalg.dpotri(np.asfortranarray(L), lower=1)
Si = choleskies.multiple_dpotri(L) Si = choleskies.multiple_dpotri(L)
logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])]) logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])])
@ -22,6 +27,15 @@ class SVGP(LatentFunctionInference):
#S = S + np.eye(S.shape[0])*1e-5*np.max(np.max(S)) #S = S + np.eye(S.shape[0])*1e-5*np.max(np.max(S))
#Si, Lnew, _,_ = linalg.pdinv(S) #Si, Lnew, _,_ = linalg.pdinv(S)
#compute mean function stuff
if mean_function is not None:
prior_mean_u = mean_function.f(Z)
prior_mean_f = mean_function.f(X)
else:
prior_mean_u = np.zeros((num_inducing, num_outputs))
prior_mean_f = np.zeros((num_data, num_outputs))
#compute kernel related stuff #compute kernel related stuff
Kmm = kern.K(Z) Kmm = kern.K(Z)
Knm = kern.K(X, Z) Knm = kern.K(X, Z)
@ -30,38 +44,64 @@ class SVGP(LatentFunctionInference):
#compute the marginal means and variances of q(f) #compute the marginal means and variances of q(f)
A = np.dot(Knm, Kmmi) A = np.dot(Knm, Kmmi)
mu = np.dot(A, q_u_mean) mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jkl->ikl', A, S),1) #v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jlk->ilk', A, S),1)
v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * linalg.ij_jlk_to_ilk(A, S),1)
#compute the KL term #compute the KL term
Kmmim = np.dot(Kmmi, q_u_mean) Kmmim = np.dot(Kmmi, q_u_mean)
KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.einsum('ij,ijk->k', Kmmi, S) + 0.5*np.sum(q_u_mean*Kmmim,0) KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[:,:,None]*S,0).sum(0) + 0.5*np.sum(q_u_mean*Kmmim,0)
KL = KLs.sum() KL = KLs.sum()
dKL_dm = Kmmim #gradient of the KL term (assuming zero mean function)
dKL_dm = Kmmim.copy()
dKL_dS = 0.5*(Kmmi[:,:,None] - Si) dKL_dS = 0.5*(Kmmi[:,:,None] - Si)
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T) dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
if mean_function is not None:
#adjust KL term for mean function
Kmmi_mfZ = np.dot(Kmmi, prior_mean_u)
KL += -np.sum(q_u_mean*Kmmi_mfZ)
KL += 0.5*np.sum(Kmmi_mfZ*prior_mean_u)
#adjust gradient for mean fucntion
dKL_dm -= Kmmi_mfZ
dKL_dKmm += Kmmim.dot(Kmmi_mfZ.T)
dKL_dKmm -= 0.5*Kmmi_mfZ.dot(Kmmi_mfZ.T)
#compute gradients for mean_function
dKL_dmfZ = Kmmi_mfZ - Kmmim
#quadrature for the likelihood #quadrature for the likelihood
F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v, Y_metadata=Y_metadata) F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v, Y_metadata=Y_metadata)
#rescale the F term if working on a batch #rescale the F term if working on a batch
F, dF_dmu, dF_dv = F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale F, dF_dmu, dF_dv = F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale
if dF_dthetaL is not None:
dF_dthetaL = dF_dthetaL.sum(1).sum(1)*batch_scale
#derivatives of expected likelihood #derivatives of expected likelihood, assuming zero mean function
Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal
Admu = A.T.dot(dF_dmu) Admu = A.T.dot(dF_dmu)
#AdvA = np.einsum('ijk,jl->ilk', Adv, A)
#AdvA = np.dot(A.T, Adv).swapaxes(0,1)
AdvA = np.dstack([np.dot(A.T, Adv[:,:,i].T) for i in range(num_outputs)]) AdvA = np.dstack([np.dot(A.T, Adv[:,:,i].T) for i in range(num_outputs)])
tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi) #tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi)
tmp = linalg.ijk_jlk_to_il(AdvA, S).dot(Kmmi)
dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(-1) - tmp - tmp.T dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(-1) - tmp - tmp.T
dF_dKmm = 0.5*(dF_dKmm + dF_dKmm.T) # necessary? GPy bug? dF_dKmm = 0.5*(dF_dKmm + dF_dKmm.T) # necessary? GPy bug?
tmp = 2.*(np.einsum('ij,jlk->ilk', Kmmi,S) - np.eye(num_inducing)[:,:,None]) #tmp = 2.*(np.einsum('ij,jlk->ilk', Kmmi,S) - np.eye(num_inducing)[:,:,None])
dF_dKmn = np.einsum('ijk,jlk->il', tmp, Adv) + Kmmim.dot(dF_dmu.T) tmp = 2.*(linalg.ij_jlk_to_ilk(Kmmi, S) - np.eye(num_inducing)[:,:,None])
#dF_dKmn = np.einsum('ijk,jlk->il', tmp, Adv) + Kmmim.dot(dF_dmu.T)
dF_dKmn = linalg.ijk_jlk_to_il(tmp, Adv) + Kmmim.dot(dF_dmu.T)
dF_dm = Admu dF_dm = Admu
dF_dS = AdvA dF_dS = AdvA
#adjust gradient to account for mean function
if mean_function is not None:
dF_dmfX = dF_dmu.copy()
dF_dmfZ = -Admu
dF_dKmn -= np.dot(Kmmi_mfZ, dF_dmu.T)
dF_dKmm += Admu.dot(Kmmi_mfZ.T)
#sum (gradients of) expected likelihood and KL part #sum (gradients of) expected likelihood and KL part
log_marginal = F.sum() - KL log_marginal = F.sum() - KL
dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn
@ -69,4 +109,8 @@ class SVGP(LatentFunctionInference):
dL_dchol = np.dstack([2.*np.dot(dL_dS[:,:,i], L[:,:,i]) for i in range(num_outputs)]) dL_dchol = np.dstack([2.*np.dot(dL_dS[:,:,i], L[:,:,i]) for i in range(num_outputs)])
dL_dchol = choleskies.triang_to_flat(dL_dchol) dL_dchol = choleskies.triang_to_flat(dL_dchol)
return Posterior(mean=q_u_mean, cov=S, K=Kmm), log_marginal, {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv, 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL} grad_dict = {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv.sum(1), 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
if mean_function is not None:
grad_dict['dL_dmfZ'] = dF_dmfZ - dKL_dmfZ
grad_dict['dL_dmfX'] = dF_dmfX
return Posterior(mean=q_u_mean, cov=S, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict

View file

@ -1,7 +1,7 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior from .posterior import Posterior
from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
from ...util import diag from ...util import diag
from ...core.parameterization.variational import VariationalPosterior from ...core.parameterization.variational import VariationalPosterior
@ -170,7 +170,7 @@ class VarDTC(LatentFunctionInference):
if VVT_factor.shape[1] == Y.shape[1]: if VVT_factor.shape[1] == Y.shape[1]:
woodbury_vector = Cpsi1Vf # == Cpsi1V woodbury_vector = Cpsi1Vf # == Cpsi1V
else: else:
print 'foobar' print('foobar')
import ipdb; ipdb.set_trace() import ipdb; ipdb.set_trace()
psi1V = np.dot(Y.T*beta, psi1).T psi1V = np.dot(Y.T*beta, psi1).T
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
@ -213,7 +213,7 @@ def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf,
dL_dR = None dL_dR = None
elif het_noise: elif het_noise:
if uncertain_inputs: if uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented" raise NotImplementedError("heteroscedatic derivates with uncertain inputs not implemented")
else: else:
#from ...util.linalg import chol_inv #from ...util.linalg import chol_inv
#LBi = chol_inv(LB) #LBi = chol_inv(LB)

View file

@ -1,7 +1,7 @@
# Copyright (c) 2014, GPy authors (see AUTHORS.txt). # Copyright (c) 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior from .posterior import Posterior
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv
from ...util import diag from ...util import diag
from ...core.parameterization.variational import VariationalPosterior from ...core.parameterization.variational import VariationalPosterior
@ -92,7 +92,7 @@ class VarDTC_minibatch(LatentFunctionInference):
psi0_full = 0. psi0_full = 0.
YRY_full = 0. YRY_full = 0.
for n_start in xrange(0,num_data,batchsize): for n_start in range(0,num_data,batchsize):
n_end = min(batchsize+n_start, num_data) n_end = min(batchsize+n_start, num_data)
if batchsize==num_data: if batchsize==num_data:
Y_slice = Y Y_slice = Y
@ -169,11 +169,13 @@ class VarDTC_minibatch(LatentFunctionInference):
Kmm = kern.K(Z).copy() Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter) diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm, maxtries=100) if not np.isfinite(Kmm).all():
print(Kmm)
Lm = jitchol(Kmm)
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right') LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
LL = jitchol(Lambda, maxtries=100) LL = jitchol(Lambda)
logdet_L = 2.*np.sum(np.log(np.diag(LL))) logdet_L = 2.*np.sum(np.log(np.diag(LL)))
b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0] b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0]
bbt = np.square(b).sum() bbt = np.square(b).sum()

View file

@ -1 +1 @@
from hmc import HMC from .hmc import HMC

View file

@ -39,7 +39,7 @@ class HMC:
:rtype: numpy.ndarray :rtype: numpy.ndarray
""" """
params = np.empty((num_samples,self.p.size)) params = np.empty((num_samples,self.p.size))
for i in xrange(num_samples): for i in range(num_samples):
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M) self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
H_old = self._computeH() H_old = self._computeH()
theta_old = self.model.optimizer_array.copy() theta_old = self.model.optimizer_array.copy()
@ -59,7 +59,7 @@ class HMC:
return params return params
def _update(self, hmc_iters): def _update(self, hmc_iters):
for i in xrange(hmc_iters): for i in range(hmc_iters):
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients()) self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
self.model.optimizer_array = self.model.optimizer_array + self.stepsize*np.dot(self.Minv, self.p) self.model.optimizer_array = self.model.optimizer_array + self.stepsize*np.dot(self.Minv, self.p)
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients()) self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
@ -82,7 +82,7 @@ class HMC_shortcut:
def sample(self, m_iters=1000, hmc_iters=20): def sample(self, m_iters=1000, hmc_iters=20):
params = np.empty((m_iters,self.p.size)) params = np.empty((m_iters,self.p.size))
for i in xrange(m_iters): for i in range(m_iters):
# sample a stepsize from the uniform distribution # sample a stepsize from the uniform distribution
stepsize = np.exp(np.random.rand()*(self.stepsize_range[1]-self.stepsize_range[0])+self.stepsize_range[0]) stepsize = np.exp(np.random.rand()*(self.stepsize_range[1]-self.stepsize_range[0])+self.stepsize_range[0])
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M) self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)

View file

@ -9,7 +9,13 @@ import sys
import re import re
import numdifftools as ndt import numdifftools as ndt
import pdb import pdb
import cPickle
try:
#In Python 2, cPickle is faster. It does not exist in Python 3 but the underlying code is always used
#if available
import cPickle as pickle
except ImportError:
import pickle
class Metropolis_Hastings: class Metropolis_Hastings:
@ -40,7 +46,7 @@ class Metropolis_Hastings:
fcurrent = self.model.log_likelihood() + self.model.log_prior() fcurrent = self.model.log_likelihood() + self.model.log_prior()
accepted = np.zeros(Ntotal,dtype=np.bool) accepted = np.zeros(Ntotal,dtype=np.bool)
for it in range(Ntotal): for it in range(Ntotal):
print "sample %d of %d\r"%(it,Ntotal), print("sample %d of %d\r"%(it,Ntotal), end=' ')
sys.stdout.flush() sys.stdout.flush()
prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale) prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
self.model._set_params_transformed(prop) self.model._set_params_transformed(prop)

View file

@ -1,2 +1,2 @@
from scg import SCG from .scg import SCG
from optimization import * from .optimization import *

View file

@ -1,7 +1,7 @@
# Copyright (c) 2012-2014, Max Zwiessele # Copyright (c) 2012-2014, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from gradient_descent_update_rules import FletcherReeves, \ from .gradient_descent_update_rules import FletcherReeves, \
PolakRibiere PolakRibiere
from Queue import Empty from Queue import Empty
from multiprocessing import Value from multiprocessing import Value
@ -74,7 +74,7 @@ class _Async_Optimization(Thread):
if self.outq is not None: if self.outq is not None:
self.outq.put(self.SENTINEL) self.outq.put(self.SENTINEL)
if self.messages: if self.messages:
print "" print("")
self.runsignal.clear() self.runsignal.clear()
def run(self, *args, **kwargs): def run(self, *args, **kwargs):
@ -213,7 +213,7 @@ class Async_Optimize(object):
# # print "^C" # # print "^C"
# self.runsignal.clear() # self.runsignal.clear()
# c.join() # c.join()
print "WARNING: callback still running, optimisation done!" print("WARNING: callback still running, optimisation done!")
return p.result return p.result
class CGD(Async_Optimize): class CGD(Async_Optimize):

View file

@ -10,7 +10,7 @@ try:
rasm_available = True rasm_available = True
except ImportError: except ImportError:
rasm_available = False rasm_available = False
from scg import SCG from .scg import SCG
class Optimizer(): class Optimizer():
""" """
@ -54,7 +54,7 @@ class Optimizer():
self.time = str(end - start) self.time = str(end - start)
def opt(self, f_fp=None, f=None, fp=None): def opt(self, f_fp=None, f=None, fp=None):
raise NotImplementedError, "this needs to be implemented to use the optimizer class" raise NotImplementedError("this needs to be implemented to use the optimizer class")
def plot(self): def plot(self):
""" """
@ -125,9 +125,9 @@ class opt_lbfgsb(Optimizer):
opt_dict = {} opt_dict = {}
if self.xtol is not None: if self.xtol is not None:
print "WARNING: l-bfgs-b doesn't have an xtol arg, so I'm going to ignore it" print("WARNING: l-bfgs-b doesn't have an xtol arg, so I'm going to ignore it")
if self.ftol is not None: if self.ftol is not None:
print "WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it" print("WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it")
if self.gtol is not None: if self.gtol is not None:
opt_dict['pgtol'] = self.gtol opt_dict['pgtol'] = self.gtol
if self.bfgs_factor is not None: if self.bfgs_factor is not None:
@ -140,6 +140,10 @@ class opt_lbfgsb(Optimizer):
self.funct_eval = opt_result[2]['funcalls'] self.funct_eval = opt_result[2]['funcalls']
self.status = rcstrings[opt_result[2]['warnflag']] self.status = rcstrings[opt_result[2]['warnflag']]
#a more helpful error message is available in opt_result in the Error case
if opt_result[2]['warnflag']==2:
self.status = 'Error' + opt_result[2]['task']
class opt_simplex(Optimizer): class opt_simplex(Optimizer):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
Optimizer.__init__(self, *args, **kwargs) Optimizer.__init__(self, *args, **kwargs)
@ -158,7 +162,7 @@ class opt_simplex(Optimizer):
if self.ftol is not None: if self.ftol is not None:
opt_dict['ftol'] = self.ftol opt_dict['ftol'] = self.ftol
if self.gtol is not None: if self.gtol is not None:
print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it" print("WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it")
opt_result = optimize.fmin(f, self.x_init, (), disp=self.messages, opt_result = optimize.fmin(f, self.x_init, (), disp=self.messages,
maxfun=self.max_f_eval, full_output=True, **opt_dict) maxfun=self.max_f_eval, full_output=True, **opt_dict)
@ -186,11 +190,11 @@ class opt_rasm(Optimizer):
opt_dict = {} opt_dict = {}
if self.xtol is not None: if self.xtol is not None:
print "WARNING: minimize doesn't have an xtol arg, so I'm going to ignore it" print("WARNING: minimize doesn't have an xtol arg, so I'm going to ignore it")
if self.ftol is not None: if self.ftol is not None:
print "WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it" print("WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it")
if self.gtol is not None: if self.gtol is not None:
print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it" print("WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it")
opt_result = rasm.minimize(self.x_init, f_fp, (), messages=self.messages, opt_result = rasm.minimize(self.x_init, f_fp, (), messages=self.messages,
maxnumfuneval=self.max_f_eval) maxnumfuneval=self.max_f_eval)

View file

@ -21,14 +21,13 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE. # POSSIBILITY OF SUCH DAMAGE.
from __future__ import print_function
import numpy as np import numpy as np
import sys import sys
def print_out(len_maxiters, fnow, current_grad, beta, iteration): def print_out(len_maxiters, fnow, current_grad, beta, iteration):
print '\r', print('\r', end=' ')
print '{0:>0{mi}g} {1:> 12e} {2:< 12.6e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', print('{0:>0{mi}g} {1:> 12e} {2:< 12.6e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush() sys.stdout.flush()
def exponents(fnow, current_grad): def exponents(fnow, current_grad):
@ -80,7 +79,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
len_maxiters = len(str(maxiters)) len_maxiters = len(str(maxiters))
if display: if display:
print ' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters) print(' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters))
exps = exponents(fnow, current_grad) exps = exponents(fnow, current_grad)
p_iter = iteration p_iter = iteration
@ -140,7 +139,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
b = np.any(n_exps < exps) b = np.any(n_exps < exps)
if a or b: if a or b:
p_iter = iteration p_iter = iteration
print '' print('')
if b: if b:
exps = n_exps exps = n_exps
@ -189,6 +188,6 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
if display: if display:
print_out(len_maxiters, fnow, current_grad, beta, iteration) print_out(len_maxiters, fnow, current_grad, beta, iteration)
print "" print("")
print status print(status)
return x, flog, function_eval, status return x, flog, function_eval, status

View file

@ -30,7 +30,7 @@ class SparseGPMissing(StochasticStorage):
Thus, we can just make sure the loop goes over self.d every Thus, we can just make sure the loop goes over self.d every
time. time.
""" """
self.d = xrange(model.Y_normalized.shape[1]) self.d = range(model.Y_normalized.shape[1])
class SparseGPStochastics(StochasticStorage): class SparseGPStochastics(StochasticStorage):
""" """

View file

@ -1,20 +1,23 @@
from _src.kern import Kern from ._src.kern import Kern
from _src.rbf import RBF from ._src.rbf import RBF
from _src.linear import Linear, LinearFull from ._src.linear import Linear, LinearFull
from _src.static import Bias, White, Fixed from ._src.static import Bias, White, Fixed
from _src.brownian import Brownian from ._src.brownian import Brownian
from _src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine from ._src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from _src.mlp import MLP from ._src.mlp import MLP
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 from ._src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
from _src.independent_outputs import IndependentOutputs, Hierarchical from ._src.independent_outputs import IndependentOutputs, Hierarchical
from _src.coregionalize import Coregionalize from ._src.coregionalize import Coregionalize
from _src.ODE_UY import ODE_UY from ._src.ODE_UY import ODE_UY
from _src.ODE_UYC import ODE_UYC from ._src.ODE_UYC import ODE_UYC
from _src.ODE_st import ODE_st from ._src.ODE_st import ODE_st
from _src.ODE_t import ODE_t from ._src.ODE_t import ODE_t
from _src.poly import Poly from ._src.poly import Poly
from _src.eq_ode2 import EQ_ODE2 from ._src.eq_ode2 import EQ_ODE2
from ._src.trunclinear import TruncLinear,TruncLinear_inf
from ._src.splitKern import SplitKern,DEtime
from ._src.splitKern import DEtime as DiffGenomeKern
from _src.trunclinear import TruncLinear,TruncLinear_inf
from _src.splitKern import SplitKern,DiffGenomeKern from _src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel

View file

@ -1,11 +1,11 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
import numpy as np import numpy as np
from independent_outputs import index_to_slices from .independent_outputs import index_to_slices
class ODE_UY(Kern): class ODE_UY(Kern):
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., active_dims=None, name='ode_uy'): def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., active_dims=None, name='ode_uy'):
@ -114,7 +114,7 @@ class ODE_UY(Kern):
elif i==1: elif i==1:
Kdiag[s1]+= Vu*Vy*(k1+k2+k3) Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else: else:
raise ValueError, "invalid input/output index" raise ValueError("invalid input/output index")
#Kdiag[slices[0][0]]+= self.variance_U #matern32 diag #Kdiag[slices[0][0]]+= self.variance_U #matern32 diag
#Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag #Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag
return Kdiag return Kdiag

View file

@ -1,11 +1,11 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
import numpy as np import numpy as np
from independent_outputs import index_to_slices from .independent_outputs import index_to_slices
class ODE_UYC(Kern): class ODE_UYC(Kern):
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., ubias =1. ,active_dims=None, name='ode_uyc'): def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., ubias =1. ,active_dims=None, name='ode_uyc'):
@ -115,7 +115,7 @@ class ODE_UYC(Kern):
elif i==1: elif i==1:
Kdiag[s1]+= Vu*Vy*(k1+k2+k3) Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else: else:
raise ValueError, "invalid input/output index" raise ValueError("invalid input/output index")
#Kdiag[slices[0][0]]+= self.variance_U #matern32 diag #Kdiag[slices[0][0]]+= self.variance_U #matern32 diag
#Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag #Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag
return Kdiag return Kdiag

View file

@ -1,10 +1,10 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
import numpy as np import numpy as np
from independent_outputs import index_to_slices from .independent_outputs import index_to_slices
class ODE_st(Kern): class ODE_st(Kern):
@ -135,7 +135,7 @@ class ODE_st(Kern):
Kdiag[s1]+= b**2*k1 - 2*a*c*k2 + a**2*k3 + c**2*vyt*vyx Kdiag[s1]+= b**2*k1 - 2*a*c*k2 + a**2*k3 + c**2*vyt*vyx
#Kdiag[s1]+= Vu*Vy*(k1+k2+k3) #Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else: else:
raise ValueError, "invalid input/output index" raise ValueError("invalid input/output index")
return Kdiag return Kdiag

View file

@ -1,8 +1,8 @@
from kern import Kern from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
import numpy as np import numpy as np
from independent_outputs import index_to_slices from .independent_outputs import index_to_slices
class ODE_t(Kern): class ODE_t(Kern):
@ -85,7 +85,7 @@ class ODE_t(Kern):
Kdiag[s1]+= k1 + vyt+self.ubias Kdiag[s1]+= k1 + vyt+self.ubias
#Kdiag[s1]+= Vu*Vy*(k1+k2+k3) #Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else: else:
raise ValueError, "invalid input/output index" raise ValueError("invalid input/output index")
return Kdiag return Kdiag

View file

@ -4,7 +4,8 @@
import numpy as np import numpy as np
import itertools import itertools
from ...util.caching import Cache_this from ...util.caching import Cache_this
from kern import CombinationKernel from .kern import CombinationKernel
from functools import reduce
class Add(CombinationKernel): class Add(CombinationKernel):
""" """
@ -84,10 +85,10 @@ class Add(CombinationKernel):
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts)) psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
#return psi2 #return psi2
# compute the "cross" terms # compute the "cross" terms
from static import White, Bias from .static import White, Bias
from rbf import RBF from .rbf import RBF
#from rbf_inv import RBFInv #from rbf_inv import RBFInv
from linear import Linear from .linear import Linear
#ffrom fixed import Fixed #ffrom fixed import Fixed
for p1, p2 in itertools.combinations(self.parts, 2): for p1, p2 in itertools.combinations(self.parts, 2):
@ -111,11 +112,11 @@ class Add(CombinationKernel):
psi2 += np.einsum('nm,no->mo',tmp1,tmp2)+np.einsum('nm,no->mo',tmp2,tmp1) psi2 += np.einsum('nm,no->mo',tmp1,tmp2)+np.einsum('nm,no->mo',tmp2,tmp1)
#(tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :]) #(tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
else: else:
raise NotImplementedError, "psi2 cannot be computed for this kernel" raise NotImplementedError("psi2 cannot be computed for this kernel")
return psi2 return psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias from .static import White, Bias
for p1 in self.parts: for p1 in self.parts:
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2! #compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy() eff_dL_dpsi1 = dL_dpsi1.copy()
@ -131,7 +132,7 @@ class Add(CombinationKernel):
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias from .static import White, Bias
target = np.zeros(Z.shape) target = np.zeros(Z.shape)
for p1 in self.parts: for p1 in self.parts:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
@ -149,7 +150,7 @@ class Add(CombinationKernel):
return target return target
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias from .static import White, Bias
target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters] target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters]
for p1 in self.parameters: for p1 in self.parameters:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2! #compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
@ -164,7 +165,7 @@ class Add(CombinationKernel):
else: else:
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior) grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
[np.add(target_grads[i],grads[i],target_grads[i]) for i in xrange(len(grads))] [np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))]
return target_grads return target_grads
def add(self, other): def add(self, other):
@ -180,9 +181,12 @@ class Add(CombinationKernel):
def input_sensitivity(self, summarize=True): def input_sensitivity(self, summarize=True):
if summarize: if summarize:
return reduce(np.add, [k.input_sensitivity(summarize) for k in self.parts]) i_s = np.zeros((self.input_dim))
for k in self.parts:
i_s[k.active_dims] += k.input_sensitivity(summarize)
return i_s
else: else:
i_s = np.zeros((len(self.parts), self.input_dim)) i_s = np.zeros((len(self.parts), self.input_dim))
from operator import setitem from operator import setitem
[setitem(i_s, (i, Ellipsis), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)] [setitem(i_s, (i, k.active_dims), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)]
return i_s return i_s

View file

@ -0,0 +1,183 @@
# #Copyright (c) 2012, Max Zwiessele (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from .kern import Kern
from ...core.parameterization.param import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
from ...util.caching import Cache_this
from ...util.linalg import tdot, mdot
class BasisFuncKernel(Kern):
def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'):
"""
Abstract superclass for kernels with explicit basis functions for use in GPy.
This class does NOT automatically add an offset to the design matrix phi!
"""
super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
self.ARD = ARD
if self.ARD:
phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim)))
variance = variance * np.ones(phi_test.shape[1])
else:
variance = np.array(variance)
self.variance = Param('variance', variance, Logexp())
self.link_parameter(self.variance)
def parameters_changed(self):
self.alpha = np.sqrt(self.variance)
self.beta = 1./self.variance
@Cache_this(limit=3, ignore_args=())
def phi(self, X):
return self._phi(X)
def _phi(self, X):
raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi')
def K(self, X, X2=None):
return self._K(X, X2)
def Kdiag(self, X, X2=None):
return np.diag(self._K(X, X2))
def update_gradients_full(self, dL_dK, X, X2=None):
if self.ARD:
phi1 = self.phi(X)
if X2 is None or X is X2:
self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi1)
else:
phi2 = self.phi(X2)
self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2)
else:
self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) * self.beta
def update_gradients_diag(self, dL_dKdiag, X):
if self.ARD:
phi1 = self.phi(X)
self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1)
else:
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta
def concatenate_offset(self, X):
return np.c_[np.ones((X.shape[0], 1)), X]
def posterior_inf(self, X=None, posterior=None):
"""
Do the posterior inference on the parameters given this kernels functions
and the model posterior, which has to be a GPy posterior, usually found at m.posterior, if m is a GPy model.
If not given we search for the the highest parent to be a model, containing the posterior, and for X accordingly.
"""
if X is None:
try:
X = self._highest_parent_.X
except NameError:
raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference")
if posterior is None:
try:
posterior = self._highest_parent_.posterior
except NameError:
raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference")
phi_alpha = self.phi(X) * self.variance
return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha))
@Cache_this(limit=3, ignore_args=())
def _K(self, X, X2):
if X2 is None or X is X2:
phi = self.phi(X) * self.alpha
if phi.ndim != 2:
phi = phi[:, None]
return tdot(phi)
else:
phi1 = self.phi(X) * self.alpha
phi2 = self.phi(X2) * self.alpha
if phi1.ndim != 2:
phi1 = phi1[:, None]
phi2 = phi2[:, None]
return phi1.dot(phi2.T)
class LinearSlopeBasisFuncKernel(BasisFuncKernel):
def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'):
"""
A linear segment transformation. The segments start at start, \
are then linear to stop and constant again. The segments are
normalized, so that they have exactly as much mass above
as below the origin.
Start and stop can be tuples or lists of starts and stops.
Behaviour of start stop is as np.where(X<start) would do.
"""
self.start = np.array(start)
self.stop = np.array(stop)
super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
@Cache_this(limit=3, ignore_args=())
def _phi(self, X):
phi = np.where(X < self.start, self.start, X)
phi = np.where(phi > self.stop, self.stop, phi)
return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.
class ChangePointBasisFuncKernel(BasisFuncKernel):
def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'):
self.changepoint = np.array(changepoint)
super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
@Cache_this(limit=3, ignore_args=())
def _phi(self, X):
return np.where((X < self.changepoint), -1, 1)
class DomainKernel(LinearSlopeBasisFuncKernel):
def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'):
super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name)
@Cache_this(limit=3, ignore_args=())
def _phi(self, X):
phi = np.where((X>self.start)*(X<self.stop), 1, 0)
return phi#((phi-self.start)/(self.stop-self.start))-.5
class LogisticBasisFuncKernel(BasisFuncKernel):
def __init__(self, input_dim, centers, variance=1., slope=1., active_dims=None, ARD=False, ARD_slope=True, name='logistic'):
self.centers = np.atleast_2d(centers)
self.ARD_slope = ARD_slope
if self.ARD_slope:
self.slope = Param('slope', slope * np.ones(self.centers.size), Logexp())
else:
self.slope = Param('slope', slope, Logexp())
super(LogisticBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
self.link_parameter(self.slope)
@Cache_this(limit=3, ignore_args=())
def _phi(self, X):
import scipy as sp
phi = 1/(1+np.exp(-((X-self.centers)*self.slope)))
return np.where(np.isnan(phi), 0, phi)#((phi-self.start)/(self.stop-self.start))-.5
def parameters_changed(self):
BasisFuncKernel.parameters_changed(self)
def update_gradients_full(self, dL_dK, X, X2=None):
super(LogisticBasisFuncKernel, self).update_gradients_full(dL_dK, X, X2)
if X2 is None or X is X2:
phi1 = self.phi(X)
if phi1.ndim != 2:
phi1 = phi1[:, None]
dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
if self.ARD_slope:
self.slope.gradient = self.variance * 2 * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi1_dl)
else:
self.slope.gradient = self.variance * 2 * (dL_dK * phi1.dot(dphi1_dl.T)).sum()
else:
phi1 = self.phi(X)
phi2 = self.phi(X2)
if phi1.ndim != 2:
phi1 = phi1[:, None]
phi2 = phi2[:, None]
dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
dphi2_dl = (phi2**2) * (np.exp(-((X2-self.centers)*self.slope)) * (X2-self.centers))
if self.ARD_slope:
self.slope.gradient = (self.variance * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi2_dl) + np.einsum('ij,iq,jq->q', dL_dK, phi2, dphi1_dl))
else:
self.slope.gradient = self.variance * (dL_dK * phi1.dot(dphi2_dl.T)).sum() + (dL_dK * phi2.dot(dphi1_dl.T)).sum()
self.slope.gradient = np.where(np.isnan(self.slope.gradient), 0, self.slope.gradient)

View file

@ -1,7 +1,7 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
import numpy as np import numpy as np

View file

@ -1,12 +1,12 @@
# Copyright (c) 2012, James Hensman and Ricardo Andrade # Copyright (c) 2012, James Hensman and Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern from .kern import Kern
import numpy as np import numpy as np
from scipy import weave
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
from ...util.config import config # for assesing whether to use weave from ...util.config import config # for assesing whether to use cython
import coregionalize_cython
class Coregionalize(Kern): class Coregionalize(Kern):
""" """
@ -57,13 +57,8 @@ class Coregionalize(Kern):
self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa) self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
def K(self, X, X2=None): def K(self, X, X2=None):
if config.getboolean('weave', 'working'): if config.getboolean('cython', 'working'):
try: return self._K_cython(X, X2)
return self._K_weave(X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
return self._K_numpy(X, X2)
else: else:
return self._K_numpy(X, X2) return self._K_numpy(X, X2)
@ -76,36 +71,10 @@ class Coregionalize(Kern):
index2 = np.asarray(X2, dtype=np.int) index2 = np.asarray(X2, dtype=np.int)
return self.B[index,index2.T] return self.B[index,index2.T]
def _K_weave(self, X, X2=None): def _K_cython(self, X, X2=None):
"""compute the kernel function using scipy.weave"""
index = np.asarray(X, dtype=np.int)
if X2 is None: if X2 is None:
target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64) return coregionalize_cython.K_symmetric(self.B, np.asarray(X, dtype=np.int64)[:,0])
code=""" return coregionalize_cython.K_asymmetric(self.B, np.asarray(X, dtype=np.int64)[:,0], np.asarray(X2, dtype=np.int64)[:,0])
for(int i=0;i<N; i++){
target[i+i*N] = B[index[i]+output_dim*index[i]];
for(int j=0; j<i; j++){
target[j+i*N] = B[index[i]+output_dim*index[j]];
target[i+j*N] = target[j+i*N];
}
}
"""
N, B, output_dim = index.size, self.B, self.output_dim
weave.inline(code, ['target', 'index', 'N', 'B', 'output_dim'])
else:
index2 = np.asarray(X2, dtype=np.int)
target = np.empty((X.shape[0], X2.shape[0]), dtype=np.float64)
code="""
for(int i=0;i<num_inducing; i++){
for(int j=0; j<N; j++){
target[i+j*num_inducing] = B[output_dim*index[j]+index2[i]];
}
}
"""
N, num_inducing, B, output_dim = index.size, index2.size, self.B, self.output_dim
weave.inline(code, ['target', 'index', 'index2', 'N', 'num_inducing', 'B', 'output_dim'])
return target
def Kdiag(self, X): def Kdiag(self, X):
@ -118,19 +87,13 @@ class Coregionalize(Kern):
else: else:
index2 = np.asarray(X2, dtype=np.int) index2 = np.asarray(X2, dtype=np.int)
#attempt to use weave for a nasty double indexing loop: fall back to numpy #attempt to use cython for a nasty double indexing loop: fall back to numpy
if config.getboolean('weave', 'working'): if config.getboolean('cython', 'working'):
try: dL_dK_small = self._gradient_reduce_cython(dL_dK, index, index2)
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
else: else:
dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2) dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)
dkappa = np.diag(dL_dK_small) dkappa = np.diag(dL_dK_small)
dL_dK_small += dL_dK_small.T dL_dK_small += dL_dK_small.T
dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0) dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
@ -138,19 +101,6 @@ class Coregionalize(Kern):
self.W.gradient = dW self.W.gradient = dW
self.kappa.gradient = dkappa self.kappa.gradient = dkappa
def _gradient_reduce_weave(self, dL_dK, index, index2):
dL_dK_small = np.zeros_like(self.B)
code="""
for(int i=0; i<num_inducing; i++){
for(int j=0; j<N; j++){
dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
}
}
"""
N, num_inducing, output_dim = index.size, index2.size, self.output_dim
weave.inline(code, ['N', 'num_inducing', 'output_dim', 'dL_dK', 'dL_dK_small', 'index', 'index2'])
return dL_dK_small
def _gradient_reduce_numpy(self, dL_dK, index, index2): def _gradient_reduce_numpy(self, dL_dK, index, index2):
index, index2 = index[:,0], index2[:,0] index, index2 = index[:,0], index2[:,0]
dL_dK_small = np.zeros_like(self.B) dL_dK_small = np.zeros_like(self.B)
@ -160,9 +110,14 @@ class Coregionalize(Kern):
dL_dK_small[j,i] = tmp1[:,index2==j].sum() dL_dK_small[j,i] = tmp1[:,index2==j].sum()
return dL_dK_small return dL_dK_small
def _gradient_reduce_cython(self, dL_dK, index, index2):
index, index2 = index[:,0], index2[:,0]
return coregionalize_cython.gradient_reduce(self.B.shape[0], dL_dK, index, index2)
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
index = np.asarray(X, dtype=np.int).flatten() index = np.asarray(X, dtype=np.int).flatten()
dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in xrange(self.output_dim)]) dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in range(self.output_dim)])
self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None] self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None]
self.kappa.gradient = dL_dKdiag_small self.kappa.gradient = dL_dKdiag_small

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,34 @@
#cython: boundscheck=True
#cython: wraparound=True
import cython
import numpy as np
cimport numpy as np
def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X):
cdef int N = X.size
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, N))
for n in range(N):
for m in range(N):
K[n,m] = B[X[n],X[m]]
return K
def K_asymmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X, np.ndarray[np.int64_t, ndim=1] X2):
cdef int N = X.size
cdef int M = X2.size
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, M))
for n in range(N):
for m in range(M):
K[n,m] = B[X[n],X2[m]]
return K
def gradient_reduce(int D, np.ndarray[double, ndim=2] dL_dK, np.ndarray[np.int64_t, ndim=1] index, np.ndarray[np.int64_t, ndim=1] index2):
cdef np.ndarray[np.double_t, ndim=2] dL_dK_small = np.zeros((D, D))
cdef int N = index.size
cdef int M = index2.size
for i in range(N):
for j in range(M):
dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
return dL_dK_small

View file

@ -3,7 +3,7 @@
import numpy as np import numpy as np
from scipy.special import wofz from scipy.special import wofz
from kern import Kern from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
from ...util.caching import Cache_this from ...util.caching import Cache_this

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern, CombinationKernel from .kern import Kern, CombinationKernel
import numpy as np import numpy as np
import itertools import itertools
@ -94,14 +94,18 @@ class IndependentOutputs(CombinationKernel):
else: else:
slices2 = index_to_slices(X2[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim])
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))] [[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
if self.single_kern: kern.gradient = target if self.single_kern:
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))] self.kern.gradient = target
else:
[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
def gradients_X(self,dL_dK, X, X2=None): def gradients_X(self,dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if X2 is None: if X2 is None:
# TODO: make use of index_to_slices # TODO: make use of index_to_slices
# FIXME: Broken as X is already sliced out
print "Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!"
values = np.unique(X[:,self.index_dim]) values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values] slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None)) [target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
@ -142,7 +146,7 @@ class IndependentOutputs(CombinationKernel):
if self.single_kern: target[:] += kern.gradient if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient else: target[i][:] += kern.gradient
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(kerns, slices))] [[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(kerns, slices))]
if self.single_kern: kern.gradient = target if self.single_kern: self.kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))] else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
class Hierarchical(CombinationKernel): class Hierarchical(CombinationKernel):

View file

@ -4,17 +4,20 @@
import sys import sys
import numpy as np import numpy as np
from ...core.parameterization.parameterized import Parameterized from ...core.parameterization.parameterized import Parameterized
from kernel_slice_operations import KernCallsViaSlicerMeta from .kernel_slice_operations import KernCallsViaSlicerMeta
from ...util.caching import Cache_this from ...util.caching import Cache_this
from GPy.core.parameterization.observable_array import ObsAr from GPy.core.parameterization.observable_array import ObsAr
from functools import reduce
import six
@six.add_metaclass(KernCallsViaSlicerMeta)
class Kern(Parameterized): class Kern(Parameterized):
#=========================================================================== #===========================================================================
# This adds input slice support. The rather ugly code for slicing can be # This adds input slice support. The rather ugly code for slicing can be
# found in kernel_slice_operations # found in kernel_slice_operations
__metaclass__ = KernCallsViaSlicerMeta # __meataclass__ is ignored in Python 3 - needs to be put in the function definiton
#__metaclass__ = KernCallsViaSlicerMeta
#Here, we use the Python module six to support Py3 and Py2 simultaneously
#=========================================================================== #===========================================================================
_support_GPU=False _support_GPU=False
def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw): def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw):
@ -178,7 +181,7 @@ class Kern(Parameterized):
""" """
assert isinstance(other, Kern), "only kernels can be added to kernels..." assert isinstance(other, Kern), "only kernels can be added to kernels..."
from add import Add from .add import Add
return Add([self, other], name=name) return Add([self, other], name=name)
def __mul__(self, other): def __mul__(self, other):
@ -210,7 +213,7 @@ class Kern(Parameterized):
""" """
assert isinstance(other, Kern), "only kernels can be multiplied to kernels..." assert isinstance(other, Kern), "only kernels can be multiplied to kernels..."
from prod import Prod from .prod import Prod
#kernels = [] #kernels = []
#if isinstance(self, Prod): kernels.extend(self.parameters) #if isinstance(self, Prod): kernels.extend(self.parameters)
#else: kernels.append(self) #else: kernels.append(self)

View file

@ -3,7 +3,7 @@
import numpy as np import numpy as np
from kern import Kern from .kern import Kern
from ...util.linalg import tdot from ...util.linalg import tdot
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp

View file

@ -1,7 +1,7 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
import numpy as np import numpy as np

View file

@ -3,11 +3,12 @@
import numpy as np import numpy as np
from kern import Kern from .kern import Kern
from ...util.linalg import mdot from ...util.linalg import mdot
from ...util.decorators import silence_errors from ...util.decorators import silence_errors
from ...core.parameterization.param import Param from ...core.parameterization.param import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
from functools import reduce
class Periodic(Kern): class Periodic(Kern):
def __init__(self, input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name): def __init__(self, input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name):
@ -67,8 +68,6 @@ class Periodic(Kern):
return np.diag(self.K(X)) return np.diag(self.K(X))
class PeriodicExponential(Periodic): class PeriodicExponential(Periodic):
""" """
Kernel of the periodic subspace (up to a given frequency) of a exponential Kernel of the periodic subspace (up to a given frequency) of a exponential

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from kern import Kern from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
class Poly(Kern): class Poly(Kern):

View file

@ -2,9 +2,24 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from kern import CombinationKernel from .kern import CombinationKernel
from ...util.caching import Cache_this from ...util.caching import Cache_this
import itertools import itertools
from functools import reduce
def numpy_invalid_op_as_exception(func):
"""
A decorator that allows catching numpy invalid operations
as exceptions (the default behaviour is raising warnings).
"""
def func_wrapper(*args, **kwargs):
np.seterr(invalid='raise')
result = func(*args, **kwargs)
np.seterr(invalid='warn')
return result
return func_wrapper
class Prod(CombinationKernel): class Prod(CombinationKernel):
""" """
@ -46,18 +61,20 @@ class Prod(CombinationKernel):
self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2) self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2)
self.parts[1].update_gradients_full(dL_dK*self.parts[0].K(X,X2), X, X2) self.parts[1].update_gradients_full(dL_dK*self.parts[0].K(X,X2), X, X2)
else: else:
k = self.K(X,X2)*dL_dK for combination in itertools.combinations(self.parts, len(self.parts) - 1):
for p in self.parts: prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
p.update_gradients_full(k/p.K(X,X2),X,X2) to_update = list(set(self.parts) - set(combination))[0]
to_update.update_gradients_full(dL_dK * prod, X, X2)
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
if len(self.parts)==2: if len(self.parts)==2:
self.parts[0].update_gradients_diag(dL_dKdiag*self.parts[1].Kdiag(X), X) self.parts[0].update_gradients_diag(dL_dKdiag*self.parts[1].Kdiag(X), X)
self.parts[1].update_gradients_diag(dL_dKdiag*self.parts[0].Kdiag(X), X) self.parts[1].update_gradients_diag(dL_dKdiag*self.parts[0].Kdiag(X), X)
else: else:
k = self.Kdiag(X)*dL_dKdiag for combination in itertools.combinations(self.parts, len(self.parts) - 1):
for p in self.parts: prod = reduce(np.multiply, [p.Kdiag(X) for p in combination])
p.update_gradients_diag(k/p.Kdiag(X),X) to_update = list(set(self.parts) - set(combination))[0]
to_update.update_gradients_diag(dL_dKdiag * prod, X)
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape) target = np.zeros(X.shape)
@ -65,9 +82,10 @@ class Prod(CombinationKernel):
target += self.parts[0].gradients_X(dL_dK*self.parts[1].K(X, X2), X, X2) target += self.parts[0].gradients_X(dL_dK*self.parts[1].K(X, X2), X, X2)
target += self.parts[1].gradients_X(dL_dK*self.parts[0].K(X, X2), X, X2) target += self.parts[1].gradients_X(dL_dK*self.parts[0].K(X, X2), X, X2)
else: else:
k = self.K(X,X2)*dL_dK for combination in itertools.combinations(self.parts, len(self.parts) - 1):
for p in self.parts: prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
target += p.gradients_X(k/p.K(X,X2),X,X2) to_update = list(set(self.parts) - set(combination))[0]
target += to_update.gradients_X(dL_dK * prod, X, X2)
return target return target
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
@ -80,3 +98,5 @@ class Prod(CombinationKernel):
for p in self.parts: for p in self.parts:
target += p.gradients_X_diag(k/p.Kdiag(X),X) target += p.gradients_X_diag(k/p.Kdiag(X),X)
return target return target

View file

@ -4,10 +4,10 @@
from ....core.parameterization.parameter_core import Pickleable from ....core.parameterization.parameter_core import Pickleable
from GPy.util.caching import Cache_this from GPy.util.caching import Cache_this
from ....core.parameterization import variational from ....core.parameterization import variational
import rbf_psi_comp from . import rbf_psi_comp
import ssrbf_psi_comp from . import ssrbf_psi_comp
import sslinear_psi_comp from . import sslinear_psi_comp
import linear_psi_comp from . import linear_psi_comp
class PSICOMP_RBF(Pickleable): class PSICOMP_RBF(Pickleable):
@Cache_this(limit=2, ignore_args=(0,)) @Cache_this(limit=2, ignore_args=(0,))
@ -17,7 +17,7 @@ class PSICOMP_RBF(Pickleable):
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior) return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
else: else:
raise ValueError, "unknown distriubtion received for psi-statistics" raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=2, ignore_args=(0,1,2,3)) @Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
@ -26,7 +26,7 @@ class PSICOMP_RBF(Pickleable):
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior) return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
else: else:
raise ValueError, "unknown distriubtion received for psi-statistics" raise ValueError("unknown distriubtion received for psi-statistics")
def _setup_observers(self): def _setup_observers(self):
pass pass
@ -40,7 +40,7 @@ class PSICOMP_Linear(Pickleable):
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return sslinear_psi_comp.psicomputations(variance, Z, variational_posterior) return sslinear_psi_comp.psicomputations(variance, Z, variational_posterior)
else: else:
raise ValueError, "unknown distriubtion received for psi-statistics" raise ValueError("unknown distriubtion received for psi-statistics")
@Cache_this(limit=2, ignore_args=(0,1,2,3)) @Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior): def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
@ -49,7 +49,7 @@ class PSICOMP_Linear(Pickleable):
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior): elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior) return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
else: else:
raise ValueError, "unknown distriubtion received for psi-statistics" raise ValueError("unknown distriubtion received for psi-statistics")
def _setup_observers(self): def _setup_observers(self):
pass pass

View file

@ -37,11 +37,11 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variati
# Compute for psi0 and psi1 # Compute for psi0 and psi1
mu2S = np.square(mu)+S mu2S = np.square(mu)+S
dL_dvar += np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) dL_dvar += (dL_dpsi0[:,None]*gamma*mu2S).sum(axis=0) + (dL_dpsi1.T.dot(gamma*mu)*Z).sum(axis=0)
dL_dgamma += np.einsum('n,q,nq->nq',dL_dpsi0,variance,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,variance,Z,mu) dL_dgamma += dL_dpsi0[:,None]*variance*mu2S+ dL_dpsi1.dot(Z)*mu*variance
dL_dmu += np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*variance,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,variance,Z) dL_dmu += dL_dpsi0[:,None]*2.*variance*gamma*mu + dL_dpsi1.dot(Z)*gamma*variance
dL_dS += np.einsum('n,nq,q->nq',dL_dpsi0,gamma,variance) dL_dS += dL_dpsi0[:,None]*variance*gamma
dL_dZ += np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, variance,mu) dL_dZ += dL_dpsi1.T.dot(gamma*mu)*variance
return dL_dvar, dL_dZ, dL_dmu, dL_dS, dL_dgamma return dL_dvar, dL_dZ, dL_dmu, dL_dS, dL_dgamma
@ -64,29 +64,23 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma):
gamma2 = np.square(gamma) gamma2 = np.square(gamma)
variance2 = np.square(variance) variance2 = np.square(variance)
mu2S = mu2+S # NxQ mu2S = mu2+S # NxQ
gvm = np.einsum('nq,nq,q->nq',gamma,mu,variance) gvm = gamma*mu*variance
common_sum = np.einsum('nq,mq->nm',gvm,Z) common_sum = gvm.dot(Z.T)
# common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
Z_expect = np.einsum('mo,mq,oq->q',dL_dpsi2,Z,Z) Z_expect_var2 = Z_expect*variance2
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
tmp = np.einsum('mo,oq->mq',dL_dpsi2T,Z) common_expect = common_sum.dot(dL_dpsi2T).dot(Z)
common_expect = np.einsum('mq,nm->nq',tmp,common_sum) Z2_expect = common_sum.dot(dL_dpsi2T)
# common_expect = np.einsum('mo,mq,no->nq',dL_dpsi2+dL_dpsi2.T,Z,common_sum) Z1_expect = dL_dpsi2T.dot(Z)
Z2_expect = np.einsum('om,nm->no',dL_dpsi2T,common_sum)
Z1_expect = np.einsum('om,mq->oq',dL_dpsi2T,Z)
dL_dvar = np.einsum('nq,q,q->q',2.*(gamma*mu2S-gamma2*mu2),variance,Z_expect)+\ dL_dvar = variance*Z_expect*2.*(gamma*mu2S-gamma2*mu2).sum(axis=0)+(common_expect*gamma*mu).sum(axis=0)
np.einsum('nq,nq,nq->q',common_expect,gamma,mu)
dL_dgamma = np.einsum('q,q,nq->nq',Z_expect,variance2,(mu2S-2.*gamma*mu2))+\ dL_dgamma = Z_expect_var2*(mu2S-2.*gamma*mu2)+common_expect*mu*variance
np.einsum('nq,q,nq->nq',common_expect,variance,mu)
dL_dmu = np.einsum('q,q,nq,nq->nq',Z_expect,variance2,mu,2.*(gamma-gamma2))+\ dL_dmu = Z_expect_var2*mu*2.*(gamma-gamma2) + common_expect*gamma*variance
np.einsum('nq,nq,q->nq',common_expect,gamma,variance)
dL_dS = np.einsum('q,nq,q->nq',Z_expect,gamma,variance2) dL_dS = gamma*Z_expect_var2
# dL_dZ = 2.*(np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma,variance2,Z,(mu2S-gamma*mu2))+np.einsum('om,nq,q,nq,nm->oq',dL_dpsi2,gamma,variance,mu,common_sum)) dL_dZ = (gamma*(mu2S-gamma*mu2)).sum(axis=0)*variance2*Z1_expect+ Z2_expect.T.dot(gamma*mu)*variance
dL_dZ = Z1_expect*np.einsum('nq,q,nq->q',gamma,variance2,(mu2S-gamma*mu2))+np.einsum('nq,q,nq,nm->mq',gamma,variance,mu,Z2_expect)
return dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ return dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ

View file

@ -22,12 +22,14 @@ try:
# _psi1 NxM # _psi1 NxM
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.binary_prob
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale) l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1) log_denom1 = np.log(S/l2+1)
log_denom2 = np.log(2*S/l2+1) log_denom2 = np.log(2*S/l2+1)
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob() log_gamma = np.log(gamma)
log_gamma1 = np.log(1.-gamma)
variance = float(variance) variance = float(variance)
psi0 = np.empty(N) psi0 = np.empty(N)
psi0[:] = variance psi0[:] = variance
@ -37,6 +39,7 @@ try:
from ....util.misc import param_to_array from ....util.misc import param_to_array
S = param_to_array(S) S = param_to_array(S)
mu = param_to_array(mu) mu = param_to_array(mu)
gamma = param_to_array(gamma)
Z = param_to_array(Z) Z = param_to_array(Z)
support_code = """ support_code = """
@ -79,7 +82,7 @@ try:
} }
} }
""" """
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz) weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
psi2 = psi2n.sum(axis=0) psi2 = psi2n.sum(axis=0)
return psi0,psi1,psi2,psi2n return psi0,psi1,psi2,psi2n
@ -94,12 +97,13 @@ try:
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.binary_prob
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1] N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale) l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1) log_denom1 = np.log(S/l2+1)
log_denom2 = np.log(2*S/l2+1) log_denom2 = np.log(2*S/l2+1)
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob() log_gamma = np.log(gamma)
gamma, gamma1 = variational_posterior.gamma_probabilities() log_gamma1 = np.log(1.-gamma)
variance = float(variance) variance = float(variance)
dvar = np.zeros(1) dvar = np.zeros(1)
@ -113,6 +117,7 @@ try:
from ....util.misc import param_to_array from ....util.misc import param_to_array
S = param_to_array(S) S = param_to_array(S)
mu = param_to_array(mu) mu = param_to_array(mu)
gamma = param_to_array(gamma)
Z = param_to_array(Z) Z = param_to_array(Z)
support_code = """ support_code = """
@ -130,7 +135,6 @@ try:
double Zm1q = Z(m1,q); double Zm1q = Z(m1,q);
double Zm2q = Z(m2,q); double Zm2q = Z(m2,q);
double gnq = gamma(n,q); double gnq = gamma(n,q);
double g1nq = gamma1(n,q);
double mu_nq = mu(n,q); double mu_nq = mu(n,q);
if(m2==0) { if(m2==0) {
@ -156,7 +160,7 @@ try:
dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum); dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.; dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
dgamma(n,q) += lpsi1*(d_exp1*g1nq-d_exp2*gnq)/exp_sum; dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum); dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum);
dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum; dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
} }
@ -184,7 +188,7 @@ try:
dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum; dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum; dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
dgamma(n,q) += lpsi2*(d_exp1*g1nq-d_exp2*gnq)/exp_sum; dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum; dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum; dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
} }
@ -192,7 +196,7 @@ try:
} }
} }
""" """
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','gamma1','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz) weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
dl *= 2.*lengthscale dl *= 2.*lengthscale
if not ARD: if not ARD:

View file

@ -3,9 +3,9 @@
import numpy as np import numpy as np
from stationary import Stationary from .stationary import Stationary
from psi_comp import PSICOMP_RBF from .psi_comp import PSICOMP_RBF
from psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU from .psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU
from ...util.config import * from ...util.config import *
class RBF(Stationary): class RBF(Stationary):

View file

@ -3,11 +3,11 @@ A new kernel
""" """
import numpy as np import numpy as np
from kern import Kern,CombinationKernel from .kern import Kern,CombinationKernel
from .independent_outputs import index_to_slices from .independent_outputs import index_to_slices
import itertools import itertools
class DiffGenomeKern(Kern): class DEtime(Kern):
def __init__(self, kernel, idx_p, Xp, index_dim=-1, name='DiffGenomeKern'): def __init__(self, kernel, idx_p, Xp, index_dim=-1, name='DiffGenomeKern'):
self.idx_p = idx_p self.idx_p = idx_p
@ -104,7 +104,7 @@ class SplitKern(CombinationKernel):
assert len(slices2)<=2, 'The Split kernel only support two different indices' assert len(slices2)<=2, 'The Split kernel only support two different indices'
target = np.zeros((X.shape[0], X2.shape[0])) target = np.zeros((X.shape[0], X2.shape[0]))
# diagonal blocks # diagonal blocks
[[target.__setitem__((s,s2), self.kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[i], slices2[i])] for i in xrange(min(len(slices),len(slices2)))] [[target.__setitem__((s,s2), self.kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[i], slices2[i])] for i in range(min(len(slices),len(slices2)))]
if len(slices)>1: if len(slices)>1:
[target.__setitem__((s,s2), self.kern_cross.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[1], slices2[0])] [target.__setitem__((s,s2), self.kern_cross.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[1], slices2[0])]
if len(slices2)>1: if len(slices2)>1:
@ -135,7 +135,7 @@ class SplitKern(CombinationKernel):
else: else:
assert dL_dK.shape==(X.shape[0],X2.shape[0]) assert dL_dK.shape==(X.shape[0],X2.shape[0])
slices2 = index_to_slices(X2[:,self.index_dim]) slices2 = index_to_slices(X2[:,self.index_dim])
[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s,s2 in itertools.product(slices[i], slices2[i])] for i in xrange(min(len(slices),len(slices2)))] [[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s,s2 in itertools.product(slices[i], slices2[i])] for i in range(min(len(slices),len(slices2)))]
if len(slices)>1: if len(slices)>1:
[collate_grads(dL_dK[s,s2], X[s], X2[s2], True) for s,s2 in itertools.product(slices[1], slices2[0])] [collate_grads(dL_dK[s,s2], X[s], X2[s2], True) for s,s2 in itertools.product(slices[1], slices2[0])]
if len(slices2)>1: if len(slices2)>1:

View file

@ -2,7 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern from .kern import Kern
import numpy as np import numpy as np
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
@ -60,7 +60,10 @@ class White(Static):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64) return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
self.variance.gradient = np.trace(dL_dK) self.variance.gradient = np.trace(dL_dK)
else:
self.variance.gradient = 0.
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dKdiag.sum() self.variance.gradient = dL_dKdiag.sum()
@ -106,7 +109,7 @@ class Fixed(Static):
return self.variance * self.fixed_K return self.variance * self.fixed_K
def Kdiag(self, X): def Kdiag(self, X):
return self.variance * self.fixed_K.diag() return self.variance * self.fixed_K.diagonal()
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K) self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)

View file

@ -2,16 +2,23 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
from ...util.linalg import tdot from ...util.linalg import tdot
from ... import util from ... import util
import numpy as np import numpy as np
from scipy import integrate, weave from scipy import integrate
from ...util.config import config # for assesing whether to use weave from ...util.config import config # for assesing whether to use cython
from ...util.caching import Cache_this from ...util.caching import Cache_this
try:
import stationary_cython
except ImportError:
print('warning: failed to import cython module: falling back to numpy')
config.set('cython', 'working', 'false')
class Stationary(Kern): class Stationary(Kern):
""" """
Stationary kernels (covariance functions). Stationary kernels (covariance functions).
@ -65,10 +72,10 @@ class Stationary(Kern):
self.link_parameters(self.variance, self.lengthscale) self.link_parameters(self.variance, self.lengthscale)
def K_of_r(self, r): def K_of_r(self, r):
raise NotImplementedError, "implement the covariance function as a fn of r to use this class" raise NotImplementedError("implement the covariance function as a fn of r to use this class")
def dK_dr(self, r): def dK_dr(self, r):
raise NotImplementedError, "implement derivative of the covariance function wrt r to use this class" raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
@Cache_this(limit=5, ignore_args=()) @Cache_this(limit=5, ignore_args=())
def K(self, X, X2=None): def K(self, X, X2=None):
@ -148,28 +155,18 @@ class Stationary(Kern):
(dL_dK), compute the gradient wrt the parameters of this kernel, (dL_dK), compute the gradient wrt the parameters of this kernel,
and store in the parameters object as e.g. self.variance.gradient and store in the parameters object as e.g. self.variance.gradient
""" """
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance) self.variance.gradient = np.sum(self.K(X, X2)* dL_dK)/self.variance
#now the lengthscale gradient(s) #now the lengthscale gradient(s)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
if self.ARD: if self.ARD:
#rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2)
#d = X[:, None, :] - X2[None, :, :]
#x_xl3 = np.square(d)
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
tmp = dL_dr*self._inv_dist(X, X2) tmp = dL_dr*self._inv_dist(X, X2)
if X2 is None: X2 = X if X2 is None: X2 = X
if config.getboolean('cython', 'working'):
self.lengthscale.gradient = self._lengthscale_grads_cython(tmp, X, X2)
if config.getboolean('weave', 'working'):
try:
self.lengthscale.gradient = self.weave_lengthscale_grads(tmp, X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
else: else:
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)]) self.lengthscale.gradient = self._lengthscale_grads_pure(tmp, X, X2)
else: else:
r = self._scaled_dist(X, X2) r = self._scaled_dist(X, X2)
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
@ -184,43 +181,27 @@ class Stationary(Kern):
dist = self._scaled_dist(X, X2).copy() dist = self._scaled_dist(X, X2).copy()
return 1./np.where(dist != 0., dist, np.inf) return 1./np.where(dist != 0., dist, np.inf)
def weave_lengthscale_grads(self, tmp, X, X2): def _lengthscale_grads_pure(self, tmp, X, X2):
"""Use scipy.weave to compute derivatives wrt the lengthscales""" return -np.array([np.sum(tmp * np.square(X[:,q:q+1] - X2[:,q:q+1].T)) for q in range(self.input_dim)])/self.lengthscale**3
def _lengthscale_grads_cython(self, tmp, X, X2):
N,M = tmp.shape N,M = tmp.shape
Q = X.shape[1] Q = self.input_dim
if hasattr(X, 'values'):X = X.values X, X2 = np.ascontiguousarray(X), np.ascontiguousarray(X2)
if hasattr(X2, 'values'):X2 = X2.values
grads = np.zeros(self.input_dim) grads = np.zeros(self.input_dim)
code = """ stationary_cython.lengthscale_grads(N, M, Q, tmp, X, X2, grads)
double gradq;
for(int q=0; q<Q; q++){
gradq = 0;
for(int n=0; n<N; n++){
for(int m=0; m<M; m++){
gradq += tmp(n,m)*(X(n,q)-X2(m,q))*(X(n,q)-X2(m,q));
}
}
grads(q) = gradq;
}
"""
weave.inline(code, ['tmp', 'X', 'X2', 'grads', 'N', 'M', 'Q'], type_converters=weave.converters.blitz, support_code="#include <math.h>")
return -grads/self.lengthscale**3 return -grads/self.lengthscale**3
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
""" """
Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
""" """
if config.getboolean('weave', 'working'): if config.getboolean('cython', 'working'):
try: return self._gradients_X_cython(dL_dK, X, X2)
return self.gradients_X_weave(dL_dK, X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
return self.gradients_X_(dL_dK, X, X2)
else: else:
return self.gradients_X_(dL_dK, X, X2) return self._gradients_X_pure(dL_dK, X, X2)
def gradients_X_(self, dL_dK, X, X2=None): def _gradients_X_pure(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2) invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp = invdist*dL_dr tmp = invdist*dL_dr
@ -230,54 +211,25 @@ class Stationary(Kern):
#The high-memory numpy way: #The high-memory numpy way:
#d = X[:, None, :] - X2[None, :, :] #d = X[:, None, :] - X2[None, :, :]
#ret = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2 #grad = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2
#the lower memory way with a loop #the lower memory way with a loop
ret = np.empty(X.shape, dtype=np.float64) grad = np.empty(X.shape, dtype=np.float64)
for q in xrange(self.input_dim): for q in range(self.input_dim):
np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=ret[:,q]) np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=grad[:,q])
ret /= self.lengthscale**2 return grad/self.lengthscale**2
return ret def _gradients_X_cython(self, dL_dK, X, X2=None):
def gradients_X_weave(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2) invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp = invdist*dL_dr tmp = invdist*dL_dr
if X2 is None: if X2 is None:
tmp = tmp + tmp.T tmp = tmp + tmp.T
X2 = X X2 = X
X, X2 = np.ascontiguousarray(X), np.ascontiguousarray(X2)
code = """ grad = np.zeros(X.shape)
int n,m,d; stationary_cython.grad_X(X.shape[0], X.shape[1], X2.shape[0], X, X2, tmp, grad)
double retnd; return grad/self.lengthscale**2
#pragma omp parallel for private(n,d, retnd, m)
for(d=0;d<D;d++){
for(n=0;n<N;n++){
retnd = 0.0;
for(m=0;m<M;m++){
retnd += tmp(n,m)*(X(n,d)-X2(m,d));
}
ret(n,d) = retnd;
}
}
"""
if hasattr(X, 'values'):X = X.values #remove the GPy wrapping to make passing into weave safe
if hasattr(X2, 'values'):X2 = X2.values
ret = np.zeros(X.shape)
N,D = X.shape
N,M = tmp.shape
from scipy import weave
support_code = """
#include <omp.h>
#include <stdio.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
'extra_link_args' : ['-lgomp']}
weave.inline(code, ['ret', 'N', 'D', 'M', 'tmp', 'X', 'X2'], type_converters=weave.converters.blitz, support_code=support_code, **weave_options)
return ret/self.lengthscale**2
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape) return np.zeros(X.shape)
@ -285,6 +237,9 @@ class Stationary(Kern):
def input_sensitivity(self, summarize=True): def input_sensitivity(self, summarize=True):
return self.variance*np.ones(self.input_dim)/self.lengthscale**2 return self.variance*np.ones(self.input_dim)/self.lengthscale**2
class Exponential(Stationary): class Exponential(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'): def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name) super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
@ -296,6 +251,8 @@ class Exponential(Stationary):
return -0.5*self.K_of_r(r) return -0.5*self.K_of_r(r)
class OU(Stationary): class OU(Stationary):
""" """
OU kernel: OU kernel:

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,36 @@
#cython: boundscheck=False
#cython: wraparound=False
import numpy as np
cimport numpy as np
ctypedef np.float64_t DTYPE_t
cdef extern from "stationary_utils.h":
void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad)
cdef extern from "stationary_utils.h":
void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad)
def grad_X(int N, int D, int M,
np.ndarray[DTYPE_t, ndim=2] _X,
np.ndarray[DTYPE_t, ndim=2] _X2,
np.ndarray[DTYPE_t, ndim=2] _tmp,
np.ndarray[DTYPE_t, ndim=2] _grad):
cdef double *X = <double*> _X.data
cdef double *X2 = <double*> _X2.data
cdef double *tmp = <double*> _tmp.data
cdef double *grad = <double*> _grad.data
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
def lengthscale_grads(int N, int M, int Q,
np.ndarray[DTYPE_t, ndim=2] _tmp,
np.ndarray[DTYPE_t, ndim=2] _X,
np.ndarray[DTYPE_t, ndim=2] _X2,
np.ndarray[DTYPE_t, ndim=1] _grad):
cdef double *tmp = <double*> _tmp.data
cdef double *X = <double*> _X.data
cdef double *X2 = <double*> _X2.data
cdef double *grad = <double*> _grad.data
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.

View file

@ -0,0 +1,35 @@
void _grad_X(int N, int D, int M, double* X, double* X2, double* tmp, double* grad){
int n,m,d;
double retnd;
//#pragma omp parallel for private(n,d, retnd, m)
for(d=0;d<D;d++){
for(n=0;n<N;n++){
retnd = 0.0;
for(m=0;m<M;m++){
retnd += tmp[n*M+m]*(X[n*D+d]-X2[m*D+d]);
}
grad[n*D+d] = retnd;
}
}
} //grad_X
void _lengthscale_grads(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
int n,m,q;
double gradq, dist;
#pragma omp parallel for private(n,m, gradq, dist)
for(q=0; q<Q; q++){
gradq = 0;
for(n=0; n<N; n++){
for(m=0; m<M; m++){
dist = X[n*Q+q]-X2[m*Q+q];
gradq += tmp[n*M+m]*dist*dist;
}
}
grad[q] = gradq;
}
} //lengthscale_grads

View file

@ -0,0 +1,3 @@
#include <omp.h>
void _grad_X(int N, int D, int M, double*X, double* X2, double* tmp, double* grad);
void _lengthscale_grads(int N, int D, int M, double* X, double* X2, double* tmp, double* grad);

View file

@ -1,7 +1,7 @@
# Check Matthew Rocklin's blog post. # Check Matthew Rocklin's blog post.
import sympy as sym import sympy as sym
import numpy as np import numpy as np
from kern import Kern from .kern import Kern
from ...core.symbolic import Symbolic_core from ...core.symbolic import Symbolic_core
@ -11,7 +11,7 @@ class Symbolic(Kern, Symbolic_core):
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', parameters=None, active_dims=None, operators=None, func_modules=[]): def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', parameters=None, active_dims=None, operators=None, func_modules=[]):
if k is None: if k is None:
raise ValueError, "You must provide an argument for the covariance function." raise ValueError("You must provide an argument for the covariance function.")
Kern.__init__(self, input_dim, active_dims, name=name) Kern.__init__(self, input_dim, active_dims, name=name)
kdiag = k kdiag = k

View file

@ -3,7 +3,7 @@
import numpy as np import numpy as np
from kern import Kern from .kern import Kern
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
from ...util.caching import Cache_this from ...util.caching import Cache_this

View file

@ -1,9 +1,10 @@
from bernoulli import Bernoulli from .bernoulli import Bernoulli
from exponential import Exponential from .exponential import Exponential
from gaussian import Gaussian from .gaussian import Gaussian
from gamma import Gamma from .gamma import Gamma
from poisson import Poisson from .poisson import Poisson
from student_t import StudentT from .student_t import StudentT
from likelihood import Likelihood from .likelihood import Likelihood
from mixed_noise import MixedNoise from .mixed_noise import MixedNoise
from binomial import Binomial from .binomial import Binomial

View file

@ -3,9 +3,8 @@
import numpy as np import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions from . import link_functions
from likelihood import Likelihood from .likelihood import Likelihood
from scipy import stats
class Bernoulli(Likelihood): class Bernoulli(Likelihood):
""" """
@ -77,23 +76,22 @@ class Bernoulli(Likelihood):
return Z_hat, mu_hat, sigma2_hat return Z_hat, mu_hat, sigma2_hat
def variational_expectations(self, Y, m, v, gh_points=None): def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Probit): if isinstance(self.gp_link, link_functions.Probit):
if gh_points is None: if gh_points is None:
gh_x, gh_w = np.polynomial.hermite.hermgauss(20) gh_x, gh_w = self._gh_points()
else: else:
gh_x, gh_w = gh_points gh_x, gh_w = gh_points
from scipy import stats
shape = m.shape shape = m.shape
m,v,Y = m.flatten(), v.flatten(), Y.flatten() m,v,Y = m.flatten(), v.flatten(), Y.flatten()
Ysign = np.where(Y==1,1,-1) Ysign = np.where(Y==1,1,-1)
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + (m*Ysign)[:,None] X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + (m*Ysign)[:,None]
p = stats.norm.cdf(X) p = std_norm_cdf(X)
p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability
N = stats.norm.pdf(X) N = std_norm_pdf(X)
F = np.log(p).dot(gh_w) F = np.log(p).dot(gh_w)
NoverP = N/p NoverP = N/p
dF_dm = (NoverP*Ysign[:,None]).dot(gh_w) dF_dm = (NoverP*Ysign[:,None]).dot(gh_w)
@ -106,10 +104,10 @@ class Bernoulli(Likelihood):
def predictive_mean(self, mu, variance, Y_metadata=None): def predictive_mean(self, mu, variance, Y_metadata=None):
if isinstance(self.gp_link, link_functions.Probit): if isinstance(self.gp_link, link_functions.Probit):
return stats.norm.cdf(mu/np.sqrt(1+variance)) return std_norm_cdf(mu/np.sqrt(1+variance))
elif isinstance(self.gp_link, link_functions.Heaviside): elif isinstance(self.gp_link, link_functions.Heaviside):
return stats.norm.cdf(mu/np.sqrt(variance)) return std_norm_cdf(mu/np.sqrt(variance))
else: else:
raise NotImplementedError raise NotImplementedError

View file

@ -3,8 +3,8 @@
import numpy as np import numpy as np
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
import link_functions from . import link_functions
from likelihood import Likelihood from .likelihood import Likelihood
from scipy import special from scipy import special
class Binomial(Likelihood): class Binomial(Likelihood):

View file

@ -5,8 +5,8 @@
import numpy as np import numpy as np
from scipy import stats,special from scipy import stats,special
import scipy as sp import scipy as sp
import link_functions from . import link_functions
from likelihood import Likelihood from .likelihood import Likelihood
class Exponential(Likelihood): class Exponential(Likelihood):
""" """
@ -57,9 +57,8 @@ class Exponential(Likelihood):
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
log_objective = np.log(link_f) - y*link_f log_objective = np.log(link_f) - y*link_f
return np.sum(log_objective) return log_objective
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
""" """
@ -77,7 +76,6 @@ class Exponential(Likelihood):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = 1./link_f - y grad = 1./link_f - y
#grad = y/(link_f**2) - 1./link_f #grad = y/(link_f**2) - 1./link_f
return grad return grad
@ -103,7 +101,6 @@ class Exponential(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
hess = -1./(link_f**2) hess = -1./(link_f**2)
#hess = -2*y/(link_f**3) + 1/(link_f**2) #hess = -2*y/(link_f**3) + 1/(link_f**2)
return hess return hess
@ -123,7 +120,6 @@ class Exponential(Likelihood):
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = 2./(link_f**3) d3lik_dlink3 = 2./(link_f**3)
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3) #d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
return d3lik_dlink3 return d3lik_dlink3

View file

@ -6,8 +6,8 @@ import numpy as np
from scipy import stats,special from scipy import stats,special
import scipy as sp import scipy as sp
from ..core.parameterization import Param from ..core.parameterization import Param
import link_functions from . import link_functions
from likelihood import Likelihood from .likelihood import Likelihood
class Gamma(Likelihood): class Gamma(Likelihood):
""" """
@ -66,12 +66,11 @@ class Gamma(Likelihood):
:rtype: float :rtype: float
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
#alpha = self.gp_link.transf(gp)*self.beta #alpha = self.gp_link.transf(gp)*self.beta
#return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha)) #return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
alpha = link_f*self.beta alpha = link_f*self.beta
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
return np.sum(log_objective) return log_objective
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
""" """
@ -90,7 +89,6 @@ class Gamma(Likelihood):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta
#old #old
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta #return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
@ -118,7 +116,6 @@ class Gamma(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2) hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2)
#old #old
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta #return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
@ -140,6 +137,5 @@ class Gamma(Likelihood):
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3) d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
return d3lik_dlink3 return d3lik_dlink3

View file

@ -13,8 +13,8 @@ James 11/12/13
import numpy as np import numpy as np
from scipy import stats, special from scipy import stats, special
import link_functions from . import link_functions
from likelihood import Likelihood from .likelihood import Likelihood
from ..core.parameterization import Param from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp from ..core.parameterization.transformations import Logexp
from scipy import stats from scipy import stats
@ -34,7 +34,9 @@ class Gaussian(Likelihood):
if gp_link is None: if gp_link is None:
gp_link = link_functions.Identity() gp_link = link_functions.Identity()
assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link" if not isinstance(gp_link, link_functions.Identity):
print("Warning, Exact inference is not implemeted for non-identity link functions,\
if you are not already, ensure Laplace inference_method is used")
super(Gaussian, self).__init__(gp_link, name=name) super(Gaussian, self).__init__(gp_link, name=name)
@ -130,11 +132,8 @@ class Gaussian(Likelihood):
:returns: log likelihood evaluated for this point :returns: log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape ln_det_cov = np.log(self.variance)
N = y.shape[0] return -(1.0/(2*self.variance))*((y-link_f)**2) - 0.5*ln_det_cov - 0.5*np.log(2.*np.pi)
ln_det_cov = N*np.log(self.variance)
return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
def dlogpdf_dlink(self, link_f, y, Y_metadata=None): def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
""" """
@ -151,8 +150,7 @@ class Gaussian(Likelihood):
:returns: gradient of log likelihood evaluated at points link(f) :returns: gradient of log likelihood evaluated at points link(f)
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape s2_i = 1.0/self.variance
s2_i = (1.0/self.variance)
grad = s2_i*y - s2_i*link_f grad = s2_i*y - s2_i*link_f
return grad return grad
@ -178,9 +176,9 @@ class Gaussian(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape
N = y.shape[0] N = y.shape[0]
hess = -(1.0/self.variance)*np.ones((N, 1)) D = link_f.shape[1]
hess = -(1.0/self.variance)*np.ones((N, D))
return hess return hess
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None): def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
@ -198,9 +196,9 @@ class Gaussian(Likelihood):
:returns: third derivative of log likelihood evaluated at points link(f) :returns: third derivative of log likelihood evaluated at points link(f)
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape
N = y.shape[0] N = y.shape[0]
d3logpdf_dlink3 = np.zeros((N,1)) D = link_f.shape[1]
d3logpdf_dlink3 = np.zeros((N,D))
return d3logpdf_dlink3 return d3logpdf_dlink3
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None): def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
@ -218,12 +216,10 @@ class Gaussian(Likelihood):
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: float :rtype: float
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape
e = y - link_f e = y - link_f
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
N = y.shape[0] dlik_dsigma = -0.5/self.variance + 0.5*s_4*np.square(e)
dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e)) return dlik_dsigma
return np.sum(dlik_dsigma) # Sure about this sum?
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None): def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
""" """
@ -240,7 +236,6 @@ class Gaussian(Likelihood):
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter :returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
dlik_grad_dsigma = -s_4*y + s_4*link_f dlik_grad_dsigma = -s_4*y + s_4*link_f
return dlik_grad_dsigma return dlik_grad_dsigma
@ -260,23 +255,26 @@ class Gaussian(Likelihood):
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter :returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.asarray(link_f).shape == np.asarray(y).shape
s_4 = 1.0/(self.variance**2) s_4 = 1.0/(self.variance**2)
N = y.shape[0] N = y.shape[0]
d2logpdf_dlink2_dvar = np.ones((N,1))*s_4 D = link_f.shape[1]
d2logpdf_dlink2_dvar = np.ones((N, D))*s_4
return d2logpdf_dlink2_dvar return d2logpdf_dlink2_dvar
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None): def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
return np.asarray([[dlogpdf_dvar]]) dlogpdf_dtheta[0,:,:] = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
return dlogpdf_dtheta
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None): def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
return dlogpdf_dlink_dvar dlogpdf_dlink_dtheta[0, :, :]= self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
return dlogpdf_dlink_dtheta
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None): def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata) d2logpdf_dlink2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
return d2logpdf_dlink2_dvar d2logpdf_dlink2_dtheta[0, :, :] = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
return d2logpdf_dlink2_dtheta
def _mean(self, gp): def _mean(self, gp):
""" """
@ -309,18 +307,17 @@ class Gaussian(Likelihood):
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp]) Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
return Ysim.reshape(orig_shape) return Ysim.reshape(orig_shape)
def log_predictive_density(self, y_test, mu_star, var_star): def log_predictive_density(self, y_test, mu_star, var_star, Y_metadata=None):
""" """
assumes independence assumes independence
""" """
v = var_star + self.variance v = var_star + self.variance
return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
def variational_expectations(self, Y, m, v, gh_points=None): def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
lik_var = float(self.variance) lik_var = float(self.variance)
F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var
dF_dmu = (Y - m)/lik_var dF_dmu = (Y - m)/lik_var
dF_dv = np.ones_like(v)*(-0.5/lik_var) dF_dv = np.ones_like(v)*(-0.5/lik_var)
dF_dlik_var = np.sum(-0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2)) dF_dtheta = -0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2)
dF_dtheta = [dF_dlik_var] return F, dF_dmu, dF_dv, dF_dtheta.reshape(1, Y.shape[0], Y.shape[1])
return F, dF_dmu, dF_dv, dF_dtheta

View file

@ -1,11 +1,11 @@
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt) # Copyright (c) 2012-2015 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from scipy import stats,special from scipy import stats,special
import scipy as sp import scipy as sp
import link_functions from . import link_functions
from ..util.misc import chain_1, chain_2, chain_3 from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian, safe_exp
from scipy.integrate import quad from scipy.integrate import quad
import warnings import warnings
from ..core.parameterization import Parameterized from ..core.parameterization import Parameterized
@ -39,6 +39,15 @@ class Likelihood(Parameterized):
assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation." assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation."
self.gp_link = gp_link self.gp_link = gp_link
self.log_concave = False self.log_concave = False
self.not_block_really = False
def request_num_latent_functions(self, Y):
"""
The likelihood should infer how many latent functions are needed for the likelihood
Default is the number of outputs
"""
return Y.shape[1]
def _gradients(self,partial): def _gradients(self,partial):
return np.zeros(0) return np.zeros(0)
@ -69,7 +78,7 @@ class Likelihood(Parameterized):
""" """
raise NotImplementedError raise NotImplementedError
def log_predictive_density(self, y_test, mu_star, var_star): def log_predictive_density(self, y_test, mu_star, var_star, Y_metadata=None):
""" """
Calculation of the log predictive density Calculation of the log predictive density
@ -86,17 +95,87 @@ class Likelihood(Parameterized):
assert y_test.shape==mu_star.shape assert y_test.shape==mu_star.shape
assert y_test.shape==var_star.shape assert y_test.shape==var_star.shape
assert y_test.shape[1] == 1 assert y_test.shape[1] == 1
def integral_generator(y, m, v):
"""Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*""" flat_y_test = y_test.flatten()
def f(f_star): flat_mu_star = mu_star.flatten()
return self.pdf(f_star, y)*np.exp(-(1./(2*v))*np.square(m-f_star)) flat_var_star = var_star.flatten()
if Y_metadata is not None:
#Need to zip individual elements of Y_metadata aswell
Y_metadata_flat = {}
if Y_metadata is not None:
for key, val in Y_metadata.items():
Y_metadata_flat[key] = np.atleast_1d(val).reshape(-1,1)
zipped_values = []
for i in range(y_test.shape[0]):
y_m = {}
for key, val in Y_metadata_flat.items():
if np.isscalar(val) or val.shape[0] == 1:
y_m[key] = val
else:
#Won't broadcast yet
y_m[key] = val[i]
zipped_values.append((flat_y_test[i], flat_mu_star[i], flat_var_star[i], y_m))
else:
#Otherwise just pass along None's
zipped_values = zip(flat_y_test, flat_mu_star, flat_var_star, [None]*y_test.shape[0])
def integral_generator(yi, mi, vi, yi_m):
"""Generate a function which can be integrated
to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
def f(fi_star):
#exponent = np.exp(-(1./(2*vi))*np.square(mi-fi_star))
#from GPy.util.misc import safe_exp
#exponent = safe_exp(exponent)
#res = safe_exp(self.logpdf(fi_star, yi, yi_m))*exponent
#More stable in the log space
res = np.exp(self.logpdf(fi_star, yi, yi_m)
- 0.5*np.log(2*np.pi*vi)
- 0.5*np.square(fi_star-mi)/vi)
if not np.isfinite(res):
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
return res
return f return f
scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v), -np.inf, np.inf) for y, m, v in zip(y_test.flatten(), mu_star.flatten(), var_star.flatten())]) p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf)
scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1) for yi, mi, vi, yi_m in zipped_values])
p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star) p_ystar = np.array(p_ystar).reshape(-1, 1)
return np.log(p_ystar) return np.log(p_ystar)
def log_predictive_density_sampling(self, y_test, mu_star, var_star, Y_metadata=None, num_samples=1000):
"""
Calculation of the log predictive density via sampling
.. math:
log p(y_{*}|D) = log 1/num_samples prod^{S}_{s=1} p(y_{*}|f_{*s})
f_{*s} ~ p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
:type mu_star: (Nx1) array
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
:type var_star: (Nx1) array
:param num_samples: num samples of p(f_{*}|mu_{*}, var_{*}) to take
:type num_samples: int
"""
assert y_test.shape==mu_star.shape
assert y_test.shape==var_star.shape
assert y_test.shape[1] == 1
#Take samples of p(f*|y)
#fi_samples = np.random.randn(num_samples)*np.sqrt(var_star) + mu_star
fi_samples = np.random.normal(mu_star, np.sqrt(var_star), size=(mu_star.shape[0], num_samples))
from scipy.misc import logsumexp
log_p_ystar = -np.log(num_samples) + logsumexp(self.logpdf(fi_samples, y_test, Y_metadata=Y_metadata), axis=1)
return log_p_ystar
def _moments_match_ep(self,obs,tau,v): def _moments_match_ep(self,obs,tau,v):
""" """
Calculation of moments using quadrature Calculation of moments using quadrature
@ -131,6 +210,13 @@ class Likelihood(Parameterized):
return z, mean, variance return z, mean, variance
#only compute gh points if required
__gh_points = None
def _gh_points(self, T=20):
if self.__gh_points is None:
self.__gh_points = np.polynomial.hermite.hermgauss(T)
return self.__gh_points
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None): def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
""" """
Use Gauss-Hermite Quadrature to compute Use Gauss-Hermite Quadrature to compute
@ -143,10 +229,9 @@ class Likelihood(Parameterized):
if no gh_points are passed, we construct them using defualt options if no gh_points are passed, we construct them using defualt options
""" """
#May be broken
if gh_points is None: if gh_points is None:
gh_x, gh_w = np.polynomial.hermite.hermgauss(20) gh_x, gh_w = self._gh_points()
else: else:
gh_x, gh_w = gh_points gh_x, gh_w = gh_points
@ -168,15 +253,22 @@ class Likelihood(Parameterized):
#d2logp_dx2 = np.clip(d2logp_dx2,-1e9,1e9) #d2logp_dx2 = np.clip(d2logp_dx2,-1e9,1e9)
#average over the gird to get derivatives of the Gaussian's parameters #average over the gird to get derivatives of the Gaussian's parameters
F = np.dot(logp, gh_w) #division by pi comes from fact that for each quadrature we need to scale by 1/sqrt(pi)
dF_dm = np.dot(dlogp_dx, gh_w) F = np.dot(logp, gh_w)/np.sqrt(np.pi)
dF_dv = np.dot(d2logp_dx2, gh_w)/2. dF_dm = np.dot(dlogp_dx, gh_w)/np.sqrt(np.pi)
dF_dv = np.dot(d2logp_dx2, gh_w)/np.sqrt(np.pi)
dF_dv /= 2.
if np.any(np.isnan(dF_dv)) or np.any(np.isinf(dF_dv)): if np.any(np.isnan(dF_dv)) or np.any(np.isinf(dF_dv)):
stop stop
if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)): if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)):
stop stop
if self.size:
dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None]) # Ntheta x (orig size) x N_{quad_points}
dF_dtheta = np.dot(dF_dtheta, gh_w)
dF_dtheta = dF_dtheta.reshape(self.size, shape[0], shape[1])
else:
dF_dtheta = None # Not yet implemented dF_dtheta = None # Not yet implemented
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), dF_dtheta return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), dF_dtheta
@ -189,28 +281,35 @@ class Likelihood(Parameterized):
""" """
#conditional_mean: the edpected value of y given some f, under this likelihood #conditional_mean: the edpected value of y given some f, under this likelihood
fmin = -np.inf
fmax = np.inf
def int_mean(f,m,v): def int_mean(f,m,v):
p = np.exp(-(0.5/v)*np.square(f - m)) exponent = -(0.5/v)*np.square(f - m)
#If exponent is under -30 then exp(exponent) will be very small, so don't exp it!)
#If p is zero then conditional_mean will overflow #If p is zero then conditional_mean will overflow
assert v.all() > 0
p = safe_exp(exponent)
#If p is zero then conditional_variance will overflow
if p < 1e-10: if p < 1e-10:
return 0. return 0.
else: else:
return self.conditional_mean(f)*p return self.conditional_mean(f)*p
scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] scaled_mean = [quad(int_mean, fmin, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance)) mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
return mean return mean
def _conditional_mean(self, f): def _conditional_mean(self, f):
"""Quadrature calculation of the conditional mean: E(Y_star|f)""" """Quadrature calculation of the conditional mean: E(Y_star|f)"""
raise NotImplementedError, "implement this function to make predictions" raise NotImplementedError("implement this function to make predictions")
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None): def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
""" """
Approximation to the predictive variance: V(Y_star) Approximation to the predictive variance: V(Y_star)
The following variance decomposition is used: The following variance decomposition is used:
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) ) V(Y_star) = E( V(Y_star|f_star)**2 ) + V( E(Y_star|f_star) )**2
:param mu: mean of posterior :param mu: mean of posterior
:param sigma: standard deviation of posterior :param sigma: standard deviation of posterior
@ -220,15 +319,22 @@ class Likelihood(Parameterized):
#sigma2 = sigma**2 #sigma2 = sigma**2
normalizer = np.sqrt(2*np.pi*variance) normalizer = np.sqrt(2*np.pi*variance)
fmin_v = -np.inf
fmin_m = np.inf
fmin = -np.inf
fmax = np.inf
from ..util.misc import safe_exp
# E( V(Y_star|f_star) ) # E( V(Y_star|f_star) )
def int_var(f,m,v): def int_var(f,m,v):
p = np.exp(-(0.5/v)*np.square(f - m)) exponent = -(0.5/v)*np.square(f - m)
p = safe_exp(exponent)
#If p is zero then conditional_variance will overflow #If p is zero then conditional_variance will overflow
if p < 1e-10: if p < 1e-10:
return 0. return 0.
else: else:
return self.conditional_variance(f)*p return self.conditional_variance(f)*p
scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)] scaled_exp_variance = [quad(int_var, fmin_v, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2 #V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
@ -240,14 +346,15 @@ class Likelihood(Parameterized):
#E( E(Y_star|f_star)**2 ) #E( E(Y_star|f_star)**2 )
def int_pred_mean_sq(f,m,v,predictive_mean_sq): def int_pred_mean_sq(f,m,v,predictive_mean_sq):
p = np.exp(-(0.5/v)*np.square(f - m)) exponent = -(0.5/v)*np.square(f - m)
p = np.exp(exponent)
#If p is zero then conditional_mean**2 will overflow #If p is zero then conditional_mean**2 will overflow
if p < 1e-10: if p < 1e-10:
return 0. return 0.
else: else:
return self.conditional_mean(f)**2*p return self.conditional_mean(f)**2*p
scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)] scaled_exp_exp2 = [quad(int_pred_mean_sq, fmin_m, fmax,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
var_exp = exp_exp2 - predictive_mean_sq var_exp = exp_exp2 - predictive_mean_sq
@ -295,9 +402,19 @@ class Likelihood(Parameterized):
:returns: likelihood evaluated for this point :returns: likelihood evaluated for this point
:rtype: float :rtype: float
""" """
if isinstance(self.gp_link, link_functions.Identity):
return self.pdf_link(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata) return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
def logpdf_sum(self, f, y, Y_metadata=None):
"""
Convenience function that can overridden for functions where this could
be computed more efficiently
"""
return np.sum(self.logpdf(f, y, Y_metadata=Y_metadata))
def logpdf(self, f, y, Y_metadata=None): def logpdf(self, f, y, Y_metadata=None):
""" """
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
@ -313,6 +430,9 @@ class Likelihood(Parameterized):
:returns: log likelihood evaluated for this point :returns: log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
if isinstance(self.gp_link, link_functions.Identity):
return self.logpdf_link(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata) return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
@ -332,11 +452,15 @@ class Likelihood(Parameterized):
:returns: derivative of log likelihood evaluated for this point :returns: derivative of log likelihood evaluated for this point
:rtype: 1xN array :rtype: 1xN array
""" """
if isinstance(self.gp_link, link_functions.Identity):
return self.dlogpdf_dlink(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
return chain_1(dlogpdf_dlink, dlink_df) return chain_1(dlogpdf_dlink, dlink_df)
@blockify_hessian
def d2logpdf_df2(self, f, y, Y_metadata=None): def d2logpdf_df2(self, f, y, Y_metadata=None):
""" """
Evaluates the link function link(f) then computes the second derivative of log likelihood using it Evaluates the link function link(f) then computes the second derivative of log likelihood using it
@ -353,13 +477,18 @@ class Likelihood(Parameterized):
:returns: second derivative of log likelihood evaluated for this point (diagonal only) :returns: second derivative of log likelihood evaluated for this point (diagonal only)
:rtype: 1xN array :rtype: 1xN array
""" """
if isinstance(self.gp_link, link_functions.Identity):
d2logpdf_df2 = self.d2logpdf_dlink2(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata) d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2) d2logpdf_df2 = chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
return d2logpdf_df2
@blockify_third
def d3logpdf_df3(self, f, y, Y_metadata=None): def d3logpdf_df3(self, f, y, Y_metadata=None):
""" """
Evaluates the link function link(f) then computes the third derivative of log likelihood using it Evaluates the link function link(f) then computes the third derivative of log likelihood using it
@ -376,6 +505,9 @@ class Likelihood(Parameterized):
:returns: third derivative of log likelihood evaluated for this point :returns: third derivative of log likelihood evaluated for this point
:rtype: float :rtype: float
""" """
if isinstance(self.gp_link, link_functions.Identity):
d3logpdf_df3 = self.d3logpdf_dlink3(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata) d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
@ -383,46 +515,75 @@ class Likelihood(Parameterized):
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
d3link_df3 = self.gp_link.d3transf_df3(f) d3link_df3 = self.gp_link.d3transf_df3(f)
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3) d3logpdf_df3 = chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
return d3logpdf_df3
def dlogpdf_dtheta(self, f, y, Y_metadata=None): def dlogpdf_dtheta(self, f, y, Y_metadata=None):
""" """
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
if self.not_block_really:
raise NotImplementedError("Need to make a decorator for this!")
if isinstance(self.gp_link, link_functions.Identity):
return self.dlogpdf_link_dtheta(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata) return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
else: else:
# There are no parameters so return an empty array for derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([1, 0]) return np.zeros((0, f.shape[0], f.shape[1]))
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None): def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
""" """
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
if self.not_block_really:
raise NotImplementedError("Need to make a decorator for this!")
if isinstance(self.gp_link, link_functions.Identity):
return self.dlogpdf_dlink_dtheta(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
dlogpdf_df_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
#Chain each parameter of hte likelihood seperately
for p in range(self.size):
dlogpdf_df_dtheta[p, :, :] = chain_1(dlogpdf_dlink_dtheta[p,:,:], dlink_df)
return dlogpdf_df_dtheta
#return chain_1(dlogpdf_dlink_dtheta, dlink_df)
else: else:
# There are no parameters so return an empty array for derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0]) return np.zeros((0, f.shape[0], f.shape[1]))
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None): def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
""" """
TODO: Doc strings TODO: Doc strings
""" """
if self.size > 0: if self.size > 0:
if self.not_block_really:
raise NotImplementedError("Need to make a decorator for this!")
if isinstance(self.gp_link, link_functions.Identity):
return self.d2logpdf_dlink2_dtheta(f, y, Y_metadata=Y_metadata)
else:
inv_link_f = self.gp_link.transf(f) inv_link_f = self.gp_link.transf(f)
dlink_df = self.gp_link.dtransf_df(f) dlink_df = self.gp_link.dtransf_df(f)
d2link_df2 = self.gp_link.d2transf_df2(f) d2link_df2 = self.gp_link.d2transf_df2(f)
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata) d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
d2logpdf_df2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
#Chain each parameter of hte likelihood seperately
for p in range(self.size):
d2logpdf_df2_dtheta[p, :, :] = chain_2(d2logpdf_dlink2_dtheta[p,:,:], dlink_df, dlogpdf_dlink_dtheta[p,:,:], d2link_df2)
return d2logpdf_df2_dtheta
#return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
else: else:
# There are no parameters so return an empty array for derivatives # There are no parameters so return an empty array for derivatives
return np.zeros([f.shape[0], 0]) return np.zeros((0, f.shape[0], f.shape[1]))
def _laplace_gradients(self, f, y, Y_metadata=None): def _laplace_gradients(self, f, y, Y_metadata=None):
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata) dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
@ -431,9 +592,9 @@ class Likelihood(Parameterized):
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names' #Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
# ensure we have gradients for every parameter we want to optimize # ensure we have gradients for every parameter we want to optimize
assert len(dlogpdf_dtheta) == self.size #1 x num_param array assert dlogpdf_dtheta.shape[0] == self.size #num_param array x f, d
assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix assert dlogpdf_df_dtheta.shape[0] == self.size #num_param x f x d x matrix or just num_param x f
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix assert d2logpdf_df2_dtheta.shape[0] == self.size #num_param x f matrix or num_param x f x d x matrix, num_param x f x f or num_param x f x f x d
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
@ -454,19 +615,98 @@ class Likelihood(Parameterized):
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None): def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
#compute the quantiles by sampling!!! #compute the quantiles by sampling!!!
N_samp = 1000 N_samp = 500
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
#ss_f = s.flatten() #ss_f = s.flatten()
#ss_y = self.samples(ss_f, Y_metadata) #ss_y = self.samples(ss_f, Y_metadata)
#ss_y = self.samples(s, Y_metadata, samples=100)
ss_y = self.samples(s, Y_metadata) ss_y = self.samples(s, Y_metadata)
#ss_y = ss_y.reshape(mu.shape[0], N_samp) #ss_y = ss_y.reshape(mu.shape[0], N_samp)
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles] return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
def samples(self, gp, Y_metadata=None): def samples(self, gp, Y_metadata=None, samples=1):
""" """
Returns a set of samples of observations based on a given value of the latent variable. Returns a set of samples of observations based on a given value of the latent variable.
:param gp: latent variable :param gp: latent variable
:param samples: number of samples to take for each f location
""" """
raise NotImplementedError raise NotImplementedError("""May be possible to use MCMC with user-tuning, see
MCMC_pdf_samples in likelihood.py and write samples function
using this, beware this is a simple implementation
of Metropolis and will not work well for all likelihoods""")
def MCMC_pdf_samples(self, fNew, num_samples=1000, starting_loc=None, stepsize=0.1, burn_in=1000, Y_metadata=None):
"""
Simple implementation of Metropolis sampling algorithm
Will run a parallel chain for each input dimension (treats each f independently)
Thus assumes f*_1 independant of f*_2 etc.
:param num_samples: Number of samples to take
:param fNew: f at which to sample around
:param starting_loc: Starting locations of the independant chains (usually will be conditional_mean of likelihood), often link_f
:param stepsize: Stepsize for the normal proposal distribution (will need modifying)
:param burnin: number of samples to use for burnin (will need modifying)
:param Y_metadata: Y_metadata for pdf
"""
print("Warning, using MCMC for sampling y*, needs to be tuned!")
if starting_loc is None:
starting_loc = fNew
from functools import partial
logpdf = partial(self.logpdf, f=fNew, Y_metadata=Y_metadata)
pdf = lambda y_star: np.exp(logpdf(y=y_star[:, None]))
#Should be the link function of f is a good starting point
#(i.e. the point before you corrupt it with the likelihood)
par_chains = starting_loc.shape[0]
chain_values = np.zeros((par_chains, num_samples))
chain_values[:, 0][:,None] = starting_loc
#Use same stepsize for all par_chains
stepsize = np.ones(par_chains)*stepsize
accepted = np.zeros((par_chains, num_samples+burn_in))
accept_ratio = np.zeros(num_samples+burn_in)
#Whilst burning in, only need to keep the previous lot
burnin_cache = np.zeros(par_chains)
burnin_cache[:] = starting_loc.flatten()
burning_in = True
for i in xrange(burn_in+num_samples):
next_ind = i-burn_in
if burning_in:
old_y = burnin_cache
else:
old_y = chain_values[:,next_ind-1]
old_lik = pdf(old_y)
#Propose new y from Gaussian proposal
new_y = np.random.normal(loc=old_y, scale=stepsize)
new_lik = pdf(new_y)
#Accept using Metropolis (not hastings) acceptance
#Always accepts if new_lik > old_lik
accept_probability = np.minimum(1, new_lik/old_lik)
u = np.random.uniform(0,1,par_chains)
#print "Accept prob: ", accept_probability
accepts = u < accept_probability
if burning_in:
burnin_cache[accepts] = new_y[accepts]
burnin_cache[~accepts] = old_y[~accepts]
if i == burn_in:
burning_in = False
chain_values[:,0] = burnin_cache
else:
#If it was accepted then new_y becomes the latest sample
chain_values[accepts, next_ind] = new_y[accepts]
#Otherwise use old y as the sample
chain_values[~accepts, next_ind] = old_y[~accepts]
accepted[~accepts, i] = 0
accepted[accepts, i] = 1
accept_ratio[i] = np.sum(accepted[:,i])/float(par_chains)
#Show progress
if i % int((burn_in+num_samples)*0.1) == 0:
print("{}% of samples taken ({})".format((i/int((burn_in+num_samples)*0.1)*10), i))
print("Last run accept ratio: ", accept_ratio[i])
print("Average accept ratio: ", np.mean(accept_ratio))
return chain_values

View file

@ -1,13 +1,10 @@
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt) # Copyright (c) 2012-2015 The GPy authors (see AUTHORS.txt)
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from scipy import stats from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf
import scipy as sp import scipy as sp
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf from ..util.misc import safe_exp, safe_square, safe_cube, safe_quad, safe_three_times
_exp_lim_val = np.finfo(np.float64).max
_lim_val = np.log(_exp_lim_val)
class GPTransformation(object): class GPTransformation(object):
""" """
@ -79,13 +76,10 @@ class Probit(GPTransformation):
return std_norm_pdf(f) return std_norm_pdf(f)
def d2transf_df2(self,f): def d2transf_df2(self,f):
#FIXME
return -f * std_norm_pdf(f) return -f * std_norm_pdf(f)
def d3transf_df3(self,f): def d3transf_df3(self,f):
#FIXME return (safe_square(f)-1.)*std_norm_pdf(f)
f2 = f**2
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
class Cloglog(GPTransformation): class Cloglog(GPTransformation):
@ -101,19 +95,23 @@ class Cloglog(GPTransformation):
""" """
def transf(self,f): def transf(self,f):
return 1-np.exp(-np.exp(f)) ef = safe_exp(f)
return 1-np.exp(-ef)
def dtransf_df(self,f): def dtransf_df(self,f):
return np.exp(f-np.exp(f)) ef = safe_exp(f)
return np.exp(f-ef)
def d2transf_df2(self,f): def d2transf_df2(self,f):
ef = np.exp(f) ef = safe_exp(f)
return -np.exp(f-ef)*(ef-1.) return -np.exp(f-ef)*(ef-1.)
def d3transf_df3(self,f): def d3transf_df3(self,f):
ef = np.exp(f) ef = safe_exp(f)
return np.exp(f-ef)*(1.-3*ef + ef**2) ef2 = safe_square(ef)
three_times_ef = safe_three_times(ef)
r_val = np.exp(f-ef)*(1.-three_times_ef + ef2)
return r_val
class Log(GPTransformation): class Log(GPTransformation):
""" """
@ -123,16 +121,16 @@ class Log(GPTransformation):
""" """
def transf(self,f): def transf(self,f):
return np.exp(np.clip(f, -_lim_val, _lim_val)) return safe_exp(f)
def dtransf_df(self,f): def dtransf_df(self,f):
return np.exp(np.clip(f, -_lim_val, _lim_val)) return safe_exp(f)
def d2transf_df2(self,f): def d2transf_df2(self,f):
return np.exp(np.clip(f, -_lim_val, _lim_val)) return safe_exp(f)
def d3transf_df3(self,f): def d3transf_df3(self,f):
return np.exp(np.clip(f, -_lim_val, _lim_val)) return safe_exp(f)
class Log_ex_1(GPTransformation): class Log_ex_1(GPTransformation):
""" """
@ -142,17 +140,20 @@ class Log_ex_1(GPTransformation):
""" """
def transf(self,f): def transf(self,f):
return np.log(1.+np.exp(f)) return np.log1p(safe_exp(f))
def dtransf_df(self,f): def dtransf_df(self,f):
return np.exp(f)/(1.+np.exp(f)) ef = safe_exp(f)
return ef/(1.+ef)
def d2transf_df2(self,f): def d2transf_df2(self,f):
aux = np.exp(f)/(1.+np.exp(f)) ef = safe_exp(f)
aux = ef/(1.+ef)
return aux*(1.-aux) return aux*(1.-aux)
def d3transf_df3(self,f): def d3transf_df3(self,f):
aux = np.exp(f)/(1.+np.exp(f)) ef = safe_exp(f)
aux = ef/(1.+ef)
daux_df = aux*(1.-aux) daux_df = aux*(1.-aux)
return daux_df - (2.*aux*daux_df) return daux_df - (2.*aux*daux_df)
@ -160,21 +161,24 @@ class Reciprocal(GPTransformation):
def transf(self,f): def transf(self,f):
return 1./f return 1./f
def dtransf_df(self,f): def dtransf_df(self, f):
return -1./(f**2) f2 = safe_square(f)
return -1./f2
def d2transf_df2(self,f): def d2transf_df2(self, f):
return 2./(f**3) f3 = safe_cube(f)
return 2./f3
def d3transf_df3(self,f): def d3transf_df3(self,f):
return -6./(f**4) f4 = safe_quad(f)
return -6./f4
class Heaviside(GPTransformation): class Heaviside(GPTransformation):
""" """
.. math:: .. math::
g(f) = I_{x \\in A} g(f) = I_{x \\geq 0}
""" """
def transf(self,f): def transf(self,f):
@ -182,7 +186,7 @@ class Heaviside(GPTransformation):
return np.where(f>0, 1, 0) return np.where(f>0, 1, 0)
def dtransf_df(self,f): def dtransf_df(self,f):
raise NotImplementedError, "This function is not differentiable!" raise NotImplementedError("This function is not differentiable!")
def d2transf_df2(self,f): def d2transf_df2(self,f):
raise NotImplementedError, "This function is not differentiable!" raise NotImplementedError("This function is not differentiable!")

View file

@ -3,9 +3,9 @@
import numpy as np import numpy as np
from scipy import stats, special from scipy import stats, special
import link_functions from . import link_functions
from likelihood import Likelihood from .likelihood import Likelihood
from gaussian import Gaussian from .gaussian import Gaussian
from ..core.parameterization import Param from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp from ..core.parameterization.transformations import Logexp
from ..core.parameterization import Parameterized from ..core.parameterization import Parameterized

View file

@ -5,8 +5,8 @@ from __future__ import division
import numpy as np import numpy as np
from scipy import stats,special from scipy import stats,special
import scipy as sp import scipy as sp
import link_functions from . import link_functions
from likelihood import Likelihood from .likelihood import Likelihood
class Poisson(Likelihood): class Poisson(Likelihood):
""" """
@ -122,7 +122,6 @@ class Poisson(Likelihood):
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
d3lik_dlink3 = 2*y/(link_f)**3 d3lik_dlink3 = 2*y/(link_f)**3
return d3lik_dlink3 return d3lik_dlink3

View file

@ -4,12 +4,13 @@
import numpy as np import numpy as np
from scipy import stats, special from scipy import stats, special
import scipy as sp import scipy as sp
import link_functions from . import link_functions
from scipy import stats, integrate from scipy import stats, integrate
from scipy.special import gammaln, gamma from scipy.special import gammaln, gamma
from likelihood import Likelihood from .likelihood import Likelihood
from ..core.parameterization import Param from ..core.parameterization import Param
from ..core.parameterization.transformations import Logexp from ..core.parameterization.transformations import Logexp
from scipy.special import psi as digamma
class StudentT(Likelihood): class StudentT(Likelihood):
""" """
@ -28,16 +29,13 @@ class StudentT(Likelihood):
super(StudentT, self).__init__(gp_link, name='Student_T') super(StudentT, self).__init__(gp_link, name='Student_T')
# sigma2 is not a noise parameter, it is a squared scale. # sigma2 is not a noise parameter, it is a squared scale.
self.sigma2 = Param('t_scale2', float(sigma2), Logexp()) self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
self.v = Param('deg_free', float(deg_free)) self.v = Param('deg_free', float(deg_free), Logexp())
self.link_parameter(self.sigma2) self.link_parameter(self.sigma2)
self.link_parameter(self.v) self.link_parameter(self.v)
self.v.constrain_fixed() #self.v.constrain_fixed()
self.log_concave = False self.log_concave = False
def parameters_changed(self):
self.variance = (self.v / float(self.v - 2)) * self.sigma2
def update_gradients(self, grads): def update_gradients(self, grads):
""" """
Pull out the gradients, be careful as the order must match the order Pull out the gradients, be careful as the order must match the order
@ -86,7 +84,6 @@ class StudentT(Likelihood):
:rtype: float :rtype: float
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
#FIXME: #FIXME:
#Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?! #Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
@ -97,7 +94,7 @@ class StudentT(Likelihood):
- 0.5*np.log(self.sigma2 * self.v * np.pi) - 0.5*np.log(self.sigma2 * self.v * np.pi)
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2)) - 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
) )
return np.sum(objective) return objective
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None): def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
""" """
@ -115,7 +112,6 @@ class StudentT(Likelihood):
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2)) grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
return grad return grad
@ -141,7 +137,6 @@ class StudentT(Likelihood):
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i)) (the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2) hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
return hess return hess
@ -161,7 +156,6 @@ class StudentT(Likelihood):
:returns: third derivative of likelihood evaluated at points f :returns: third derivative of likelihood evaluated at points f
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) / d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
((e**2 + self.sigma2*self.v)**3) ((e**2 + self.sigma2*self.v)**3)
@ -183,10 +177,10 @@ class StudentT(Likelihood):
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: float :rtype: float
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2)) e2 = np.square(e)
return np.sum(dlogpdf_dvar) dlogpdf_dvar = self.v*(e2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e2))
return dlogpdf_dvar
def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None): def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
""" """
@ -203,7 +197,6 @@ class StudentT(Likelihood):
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter :returns: derivative of likelihood evaluated at points f w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2) dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
return dlogpdf_dlink_dvar return dlogpdf_dlink_dvar
@ -223,27 +216,53 @@ class StudentT(Likelihood):
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter :returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
:rtype: Nx1 array :rtype: Nx1 array
""" """
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
e = y - inv_link_f e = y - inv_link_f
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2))) d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
/ ((self.sigma2*self.v + (e**2))**3) / ((self.sigma2*self.v + (e**2))**3)
) )
return d2logpdf_dlink2_dvar return d2logpdf_dlink2_dvar
def dlogpdf_link_dv(self, inv_link_f, y, Y_metadata=None):
e = y - inv_link_f
e2 = np.square(e)
df = float(self.v[:])
s2 = float(self.sigma2[:])
dlogpdf_dv = 0.5*digamma(0.5*(df+1)) - 0.5*digamma(0.5*df) - 1.0/(2*df)
dlogpdf_dv += 0.5*(df+1)*e2/(df*(e2 + s2*df))
dlogpdf_dv -= 0.5*np.log1p(e2/(s2*df))
return dlogpdf_dv
def dlogpdf_dlink_dv(self, inv_link_f, y, Y_metadata=None):
e = y - inv_link_f
e2 = np.square(e)
df = float(self.v[:])
s2 = float(self.sigma2[:])
dlogpdf_df_dv = e*(e2 - self.sigma2)/(e2 + s2*df)**2
return dlogpdf_df_dv
def d2logpdf_dlink2_dv(self, inv_link_f, y, Y_metadata=None):
e = y - inv_link_f
e2 = np.square(e)
df = float(self.v[:])
s2 = float(self.sigma2[:])
e2_s2v = e**2 + s2*df
d2logpdf_df2_dv = (-s2*(df+1) + e2 - s2*df)/e2_s2v**2 - 2*s2*(df+1)*(e2 - s2*df)/e2_s2v**3
return d2logpdf_df2_dv
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None): def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet dlogpdf_dv = self.dlogpdf_link_dv(f, y, Y_metadata=Y_metadata)
return np.hstack((dlogpdf_dvar, dlogpdf_dv)) return np.array((dlogpdf_dvar, dlogpdf_dv))
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None): def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata) dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet dlogpdf_dlink_dv = self.dlogpdf_dlink_dv(f, y, Y_metadata=Y_metadata)
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv)) return np.array((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None): def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata) d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet d2logpdf_dlink2_dv = self.d2logpdf_dlink2_dv(f, y, Y_metadata=Y_metadata)
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv)) return np.array((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
def predictive_mean(self, mu, sigma, Y_metadata=None): def predictive_mean(self, mu, sigma, Y_metadata=None):
# The comment here confuses mean and median. # The comment here confuses mean and median.

View file

@ -1,7 +1,10 @@
# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt). # Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernel import Kernel from .kernel import Kernel
from linear import Linear from .linear import Linear
from mlp import MLP from .mlp import MLP
#from rbf import RBF from .additive import Additive
from .compound import Compound
from .constant import Constant

View file

@ -2,8 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ..core.mapping import Mapping from ..core import Mapping
import GPy
class Additive(Mapping): class Additive(Mapping):
""" """
@ -17,45 +16,23 @@ class Additive(Mapping):
:type mapping1: GPy.mappings.Mapping :type mapping1: GPy.mappings.Mapping
:param mapping2: second mapping to add together. :param mapping2: second mapping to add together.
:type mapping2: GPy.mappings.Mapping :type mapping2: GPy.mappings.Mapping
:param tensor: whether or not to use the tensor product of input spaces
:type tensor: bool
""" """
def __init__(self, mapping1, mapping2, tensor=False): def __init__(self, mapping1, mapping2):
if tensor:
input_dim = mapping1.input_dim + mapping2.input_dim
else:
input_dim = mapping1.input_dim
assert(mapping1.input_dim==mapping2.input_dim) assert(mapping1.input_dim==mapping2.input_dim)
assert(mapping1.output_dim==mapping2.output_dim) assert(mapping1.output_dim==mapping2.output_dim)
output_dim = mapping1.output_dim input_dim, output_dim = mapping1.input_dim, mapping1.output_dim
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim) Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.mapping1 = mapping1 self.mapping1 = mapping1
self.mapping2 = mapping2 self.mapping2 = mapping2
self.num_params = self.mapping1.num_params + self.mapping2.num_params
self.name = self.mapping1.name + '+' + self.mapping2.name
def _get_param_names(self):
return self.mapping1._get_param_names + self.mapping2._get_param_names
def _get_params(self):
return np.hstack((self.mapping1._get_params(), self.mapping2._get_params()))
def _set_params(self, x):
self.mapping1._set_params(x[:self.mapping1.num_params])
self.mapping2._set_params(x[self.mapping1.num_params:])
def randomize(self):
self.mapping1._randomize()
self.mapping2._randomize()
def f(self, X): def f(self, X):
return self.mapping1.f(X) + self.mapping2.f(X) return self.mapping1.f(X) + self.mapping2.f(X)
def df_dtheta(self, dL_df, X): def update_gradients(self, dL_dF, X):
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T self.mapping1.update_gradients(dL_dF, X)
self._df_dbias = (dL_df.sum(0)) self.mapping2.update_gradients(dL_dF, X)
return np.hstack((self._df_dA.flatten(), self._df_dbias))
def df_dX(self, dL_df, X): def gradients_X(self, dL_dF, X):
return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X) return self.mapping1.gradients_X(dL_dF, X) + self.mapping2.gradients_X(dL_dF, X)

39
GPy/mappings/compound.py Normal file
View file

@ -0,0 +1,39 @@
# Copyright (c) 2015, James Hensman and Alan Saul
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from ..core import Mapping
class Compound(Mapping):
"""
Mapping based on passing one mapping through another
.. math::
f(\mathbf{x}) = f_2(f_1(\mathbf{x}))
:param mapping1: first mapping
:type mapping1: GPy.mappings.Mapping
:param mapping2: second mapping
:type mapping2: GPy.mappings.Mapping
"""
def __init__(self, mapping1, mapping2):
assert(mapping1.output_dim==mapping2.input_dim)
input_dim, output_dim = mapping1.input_dim, mapping2.output_dim
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
self.mapping1 = mapping1
self.mapping2 = mapping2
self.link_parameters(self.mapping1, self.mapping2)
def f(self, X):
return self.mapping2.f(self.mapping1.f(X))
def update_gradients(self, dL_dF, X):
hidden = self.mapping1.f(X)
self.mapping2.update_gradients(dL_dF, hidden)
self.mapping1.update_gradients(self.mapping2.gradients_X(dL_dF, hidden), X)
def gradients_X(self, dL_dF, X):
hidden = self.mapping1.f(X)
return self.mapping1.gradients_X(self.mapping2.gradients_X(dL_dF, hidden), X)

40
GPy/mappings/constant.py Normal file
View file

@ -0,0 +1,40 @@
# Copyright (c) 2015, James Hensman, Alan Saul
import numpy as np
from ..core.mapping import Mapping
from ..core.parameterization import Param
class Constant(Mapping):
"""
A Linear mapping.
.. math::
F(\mathbf{x}) = c
:param input_dim: dimension of input.
:type input_dim: int
:param output_dim: dimension of output.
:type output_dim: int
:param: value the value of this constant mapping
"""
def __init__(self, input_dim, output_dim, value=0., name='constmap'):
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
value = np.atleast_1d(value)
if not len(value.shape) ==1:
raise ValueError("bad constant values: pass a float or flat vectoor")
elif value.size==1:
value = np.ones(self.output_dim)*value
self.C = Param('C', value)
self.link_parameter(self.C)
def f(self, X):
return np.tile(self.C.values[None,:], (X.shape[0], 1))
def update_gradients(self, dL_dF, X):
self.C.gradient = dL_dF.sum(0)
def gradients_X(self, dL_dF, X):
return np.zeros_like(X)

26
GPy/mappings/identity.py Normal file
View file

@ -0,0 +1,26 @@
# Copyright (c) 2015, James Hensman
from ..core.mapping import Mapping
from ..core import Param
class Identity(Mapping):
"""
A mapping that does nothing!
"""
def __init__(self, input_dim, output_dim, name='identity'):
Mapping.__init__(self, input_dim, output_dim, name)
def f(self, X):
return X
def update_gradients(self, dL_dF, X):
pass
def gradients_X(self, dL_dF, X):
return dL_dF

View file

@ -1,9 +1,10 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ..core.mapping import Mapping from ..core.mapping import Mapping
import GPy from ..core import Param
class Kernel(Mapping): class Kernel(Mapping):
""" """
@ -11,50 +12,41 @@ class Kernel(Mapping):
.. math:: .. math::
f(\mathbf{x}*) = \mathbf{A}\mathbf{k}(\mathbf{X}, \mathbf{x}^*) + \mathbf{b} f(\mathbf{x}) = \sum_i \alpha_i k(\mathbf{z}_i, \mathbf{x})
:param X: input observations containing :math:`\mathbf{X}` or for multple outputs
:type X: ndarray
.. math::
f_i(\mathbf{x}) = \sum_j \alpha_{i,j} k(\mathbf{z}_i, \mathbf{x})
:param input_dim: dimension of input.
:type input_dim: int
:param output_dim: dimension of output. :param output_dim: dimension of output.
:type output_dim: int :type output_dim: int
:param Z: input observations containing :math:`\mathbf{Z}`
:type Z: ndarray
:param kernel: a GPy kernel, defaults to GPy.kern.RBF :param kernel: a GPy kernel, defaults to GPy.kern.RBF
:type kernel: GPy.kern.kern :type kernel: GPy.kern.kern
""" """
def __init__(self, X, output_dim=1, kernel=None): def __init__(self, input_dim, output_dim, Z, kernel, name='kernmap'):
Mapping.__init__(self, input_dim=X.shape[1], output_dim=output_dim) Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
if kernel is None:
kernel = GPy.kern.RBF(self.input_dim)
self.kern = kernel self.kern = kernel
self.X = X self.Z = Z
self.num_data = X.shape[0] self.num_bases, Zdim = Z.shape
self.num_params = self.output_dim*(self.num_data + 1) assert Zdim == self.input_dim
self.A = np.array((self.num_data, self.output_dim)) self.A = Param('A', np.random.randn(self.num_bases, self.output_dim))
self.bias = np.array(self.output_dim) self.link_parameter(self.A)
self.randomize()
self.name = 'kernel'
def _get_param_names(self):
return sum([['A_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.num_data)], []) + ['bias_%i' % d for d in range(self.output_dim)]
def _get_params(self):
return np.hstack((self.A.flatten(), self.bias))
def _set_params(self, x):
self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy()
self.bias = x[self.num_data*self.output_dim:].copy()
def randomize(self):
self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1)
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.num_data+1)
def f(self, X): def f(self, X):
return np.dot(self.kern.K(X, self.X),self.A) + self.bias return np.dot(self.kern.K(X, self.Z), self.A)
def df_dtheta(self, dL_df, X): def update_gradients(self, dL_dF, X):
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T self.kern.update_gradients_full(np.dot(dL_dF, self.A.T), X, self.Z)
self._df_dbias = (dL_df.sum(0)) self.A.gradient = np.dot( self.kern.K(self.Z, X), dL_dF)
return np.hstack((self._df_dA.flatten(), self._df_dbias))
def df_dX(self, dL_df, X): def gradients_X(self, dL_dF, X):
return self.kern.gradients_X((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X) return self.kern.gradients_X(np.dot(dL_dF, self.A.T), X, self.Z)

View file

@ -1,43 +1,39 @@
# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt). # Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
# Copyright (c) 2015, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ..core.mapping import Bijective_mapping from ..core.mapping import Mapping
from ..core.parameterization import Param from ..core.parameterization import Param
class Linear(Bijective_mapping): class Linear(Mapping):
""" """
Mapping based on a linear model. A Linear mapping.
.. math:: .. math::
f(\mathbf{x}*) = \mathbf{W}\mathbf{x}^* + \mathbf{b} F(\mathbf{x}) = \mathbf{A} \mathbf{x})
:param X: input observations
:type X: ndarray :param input_dim: dimension of input.
:type input_dim: int
:param output_dim: dimension of output. :param output_dim: dimension of output.
:type output_dim: int :type output_dim: int
:param kernel: a GPy kernel, defaults to GPy.kern.RBF
:type kernel: GPy.kern.kern
""" """
def __init__(self, input_dim=1, output_dim=1, name='linear'): def __init__(self, input_dim, output_dim, name='linmap'):
Bijective_mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name) Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
self.W = Param('W',np.array((self.input_dim, self.output_dim))) self.A = Param('A', np.random.randn(self.input_dim, self.output_dim))
self.bias = Param('bias',np.array(self.output_dim)) self.link_parameter(self.A)
self.link_parameters(self.W, self.bias)
def f(self, X): def f(self, X):
return np.dot(X,self.W) + self.bias return np.dot(X, self.A)
def g(self, f): def update_gradients(self, dL_dF, X):
V = np.linalg.solve(np.dot(self.W.T, self.W), W.T) self.A.gradient = np.dot( X.T, dL_dF)
return np.dot(f-self.bias, V)
def df_dtheta(self, dL_df, X): def gradients_X(self, dL_dF, X):
df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T return np.dot(dL_dF, self.A.T)
df_dbias = (dL_df.sum(0))
return np.hstack((df_dW.flatten(), df_dbias))
def dL_dX(self, partial, X):
"""The gradient of L with respect to the inputs to the mapping, where L is a function that is dependent on the output of the mapping, f."""
return (partial[:, None, :]*self.W[None, :, :]).sum(2)

Some files were not shown because too many files have changed in this diff Show more