mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-10 20:42:39 +02:00
Merge branch 'devel' of https://github.com/SheffieldML/GPy into devel
This commit is contained in:
commit
c05540dc31
170 changed files with 30768 additions and 2183 deletions
|
|
@ -18,7 +18,8 @@ before_install:
|
|||
|
||||
install:
|
||||
- conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.7 scipy=0.12 matplotlib nose sphinx pip nose
|
||||
- pip install .
|
||||
#- pip install .
|
||||
- python setup.py build_ext --inplace
|
||||
#--use-mirrors
|
||||
#
|
||||
# command to run tests, e.g. python setup.py test
|
||||
|
|
|
|||
|
|
@ -3,23 +3,23 @@
|
|||
import warnings
|
||||
warnings.filterwarnings("ignore", category=DeprecationWarning)
|
||||
|
||||
import core
|
||||
from core.parameterization import transformations, priors
|
||||
from . import core
|
||||
from .core.parameterization import transformations, priors
|
||||
constraints = transformations
|
||||
import models
|
||||
import mappings
|
||||
import inference
|
||||
import util
|
||||
import examples
|
||||
import likelihoods
|
||||
import testing
|
||||
from . import models
|
||||
from . import mappings
|
||||
from . import inference
|
||||
from . import util
|
||||
from . import examples
|
||||
from . import likelihoods
|
||||
from . import testing
|
||||
from numpy.testing import Tester
|
||||
import kern
|
||||
import plotting
|
||||
from . import kern
|
||||
from . import plotting
|
||||
|
||||
# Direct imports for convenience:
|
||||
from core import Model
|
||||
from core.parameterization import Param, Parameterized, ObsAr
|
||||
from .core import Model
|
||||
from .core.parameterization import Param, Parameterized, ObsAr
|
||||
|
||||
#@nottest
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from model import *
|
||||
from parameterization.parameterized import adjust_name_for_printing, Parameterizable
|
||||
from parameterization.param import Param, ParamConcatenation
|
||||
from parameterization.observable_array import ObsAr
|
||||
from .model import *
|
||||
from .parameterization.parameterized import adjust_name_for_printing, Parameterizable
|
||||
from .parameterization.param import Param, ParamConcatenation
|
||||
from .parameterization.observable_array import ObsAr
|
||||
|
||||
from gp import GP
|
||||
from svgp import SVGP
|
||||
from sparse_gp import SparseGP
|
||||
from mapping import *
|
||||
from .gp import GP
|
||||
from .svgp import SVGP
|
||||
from .sparse_gp import SparseGP
|
||||
from .mapping import *
|
||||
|
|
|
|||
120
GPy/core/gp.py
120
GPy/core/gp.py
|
|
@ -4,13 +4,15 @@
|
|||
import numpy as np
|
||||
import sys
|
||||
from .. import kern
|
||||
from model import Model
|
||||
from parameterization import ObsAr
|
||||
from .model import Model
|
||||
from .parameterization import ObsAr
|
||||
from .mapping import Mapping
|
||||
from .. import likelihoods
|
||||
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
|
||||
from parameterization.variational import VariationalPosterior
|
||||
from .parameterization.variational import VariationalPosterior
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from GPy.util.normalizer import MeanNorm
|
||||
logger = logging.getLogger("GP")
|
||||
|
||||
|
|
@ -34,7 +36,7 @@ class GP(Model):
|
|||
|
||||
|
||||
"""
|
||||
def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
|
||||
def __init__(self, X, Y, kernel, likelihood, mean_function=None, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
|
||||
super(GP, self).__init__(name)
|
||||
|
||||
assert X.ndim == 2
|
||||
|
|
@ -62,10 +64,14 @@ class GP(Model):
|
|||
self.Y = ObsAr(Y)
|
||||
self.Y_normalized = self.Y
|
||||
|
||||
assert Y.shape[0] == self.num_data
|
||||
if Y.shape[0] != self.num_data:
|
||||
#There can be cases where we want inputs than outputs, for example if we have multiple latent
|
||||
#function values
|
||||
warnings.warn("There are more rows in your input data X, \
|
||||
than in your output data Y, be VERY sure this is what you want")
|
||||
_, self.output_dim = self.Y.shape
|
||||
|
||||
#TODO: check the type of this is okay?
|
||||
assert ((Y_metadata is None) or isinstance(Y_metadata, dict))
|
||||
self.Y_metadata = Y_metadata
|
||||
|
||||
assert isinstance(kernel, kern.Kern)
|
||||
|
|
@ -75,6 +81,15 @@ class GP(Model):
|
|||
assert isinstance(likelihood, likelihoods.Likelihood)
|
||||
self.likelihood = likelihood
|
||||
|
||||
#handle the mean function
|
||||
self.mean_function = mean_function
|
||||
if mean_function is not None:
|
||||
assert isinstance(self.mean_function, Mapping)
|
||||
assert mean_function.input_dim == self.input_dim
|
||||
assert mean_function.output_dim == self.output_dim
|
||||
self.link_parameter(mean_function)
|
||||
|
||||
|
||||
#find a sensible inference method
|
||||
logger.info("initializing inference method")
|
||||
if inference_method is None:
|
||||
|
|
@ -82,14 +97,16 @@ class GP(Model):
|
|||
inference_method = exact_gaussian_inference.ExactGaussianInference()
|
||||
else:
|
||||
inference_method = expectation_propagation.EP()
|
||||
print "defaulting to ", inference_method, "for latent function inference"
|
||||
print("defaulting to ", inference_method, "for latent function inference")
|
||||
self.inference_method = inference_method
|
||||
|
||||
logger.info("adding kernel and likelihood as parameters")
|
||||
self.link_parameter(self.kern)
|
||||
self.link_parameter(self.likelihood)
|
||||
self.posterior = None
|
||||
|
||||
def set_XY(self, X=None, Y=None):
|
||||
|
||||
def set_XY(self, X=None, Y=None, trigger_update=True):
|
||||
"""
|
||||
Set the input / output data of the model
|
||||
This is useful if we wish to change our existing data but maintain the same model
|
||||
|
|
@ -99,7 +116,7 @@ class GP(Model):
|
|||
:param Y: output observations
|
||||
:type Y: np.ndarray
|
||||
"""
|
||||
self.update_model(False)
|
||||
if trigger_update: self.update_model(False)
|
||||
if Y is not None:
|
||||
if self.normalizer is not None:
|
||||
self.normalizer.scale_by(Y)
|
||||
|
|
@ -123,26 +140,26 @@ class GP(Model):
|
|||
self.link_parameters(self.X)
|
||||
else:
|
||||
self.X = ObsAr(X)
|
||||
self.update_model(True)
|
||||
self._trigger_params_changed()
|
||||
if trigger_update: self.update_model(True)
|
||||
if trigger_update: self._trigger_params_changed()
|
||||
|
||||
def set_X(self,X):
|
||||
def set_X(self,X, trigger_update=True):
|
||||
"""
|
||||
Set the input data of the model
|
||||
|
||||
:param X: input observations
|
||||
:type X: np.ndarray
|
||||
"""
|
||||
self.set_XY(X=X)
|
||||
self.set_XY(X=X, trigger_update=trigger_update)
|
||||
|
||||
def set_Y(self,Y):
|
||||
def set_Y(self,Y, trigger_update=True):
|
||||
"""
|
||||
Set the output data of the model
|
||||
|
||||
:param X: output observations
|
||||
:type X: np.ndarray
|
||||
"""
|
||||
self.set_XY(Y=Y)
|
||||
self.set_XY(Y=Y, trigger_update=trigger_update)
|
||||
|
||||
def parameters_changed(self):
|
||||
"""
|
||||
|
|
@ -153,9 +170,11 @@ class GP(Model):
|
|||
This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
|
||||
this method yourself, there may be unexpected consequences.
|
||||
"""
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.mean_function, self.Y_metadata)
|
||||
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
|
||||
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
|
||||
if self.mean_function is not None:
|
||||
self.mean_function.update_gradients(self.grad_dict['dL_dm'], self.X)
|
||||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
|
|
@ -192,6 +211,10 @@ class GP(Model):
|
|||
|
||||
#force mu to be a column vector
|
||||
if len(mu.shape)==1: mu = mu[:,None]
|
||||
|
||||
#add the mean function in
|
||||
if not self.mean_function is None:
|
||||
mu += self.mean_function.f(_Xnew)
|
||||
return mu, var
|
||||
|
||||
def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
|
||||
|
|
@ -241,12 +264,14 @@ class GP(Model):
|
|||
|
||||
def predictive_gradients(self, Xnew):
|
||||
"""
|
||||
Compute the derivatives of the latent function with respect to X*
|
||||
Compute the derivatives of the predicted latent function with respect to X*
|
||||
|
||||
Given a set of points at which to predict X* (size [N*,Q]), compute the
|
||||
derivatives of the mean and variance. Resulting arrays are sized:
|
||||
dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
|
||||
|
||||
Note that this is not the same as computing the mean and variance of the derivative of the function!
|
||||
|
||||
dv_dX* -- [N*, Q], (since all outputs have the same variance)
|
||||
:param X: The points at which to get the predictive gradients
|
||||
:type X: np.ndarray (Xnew x self.input_dim)
|
||||
|
|
@ -276,7 +301,7 @@ class GP(Model):
|
|||
:type size: int.
|
||||
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
|
||||
:type full_cov: bool.
|
||||
:returns: Ysim: set of simulations
|
||||
:returns: fsim: set of simulations
|
||||
:rtype: np.ndarray (N x samples)
|
||||
"""
|
||||
m, v = self._raw_predict(X, full_cov=full_cov)
|
||||
|
|
@ -284,11 +309,11 @@ class GP(Model):
|
|||
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
|
||||
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
|
||||
if not full_cov:
|
||||
Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
|
||||
fsim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
|
||||
else:
|
||||
Ysim = np.random.multivariate_normal(m.flatten(), v, size).T
|
||||
fsim = np.random.multivariate_normal(m.flatten(), v, size).T
|
||||
|
||||
return Ysim
|
||||
return fsim
|
||||
|
||||
def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -304,16 +329,16 @@ class GP(Model):
|
|||
:type noise_model: integer.
|
||||
:returns: Ysim: set of simulations, a Numpy array (N x samples).
|
||||
"""
|
||||
Ysim = self.posterior_samples_f(X, size, full_cov=full_cov)
|
||||
Ysim = self.likelihood.samples(Ysim, Y_metadata)
|
||||
|
||||
fsim = self.posterior_samples_f(X, size, full_cov=full_cov)
|
||||
Ysim = self.likelihood.samples(fsim, Y_metadata)
|
||||
return Ysim
|
||||
|
||||
def plot_f(self, plot_limits=None, which_data_rows='all',
|
||||
which_data_ycols='all', fixed_inputs=[],
|
||||
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||
plot_raw=True,
|
||||
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
|
||||
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx',
|
||||
apply_link=False):
|
||||
"""
|
||||
Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
|
||||
This is a call to plot with plot_raw=True.
|
||||
|
|
@ -350,6 +375,8 @@ class GP(Model):
|
|||
:type Y_metadata: dict
|
||||
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
|
||||
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
|
||||
:param apply_link: if there is a link function of the likelihood, plot the link(f*) rather than f*
|
||||
:type apply_link: boolean
|
||||
"""
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ..plotting.matplot_dep import models_plots
|
||||
|
|
@ -362,13 +389,13 @@ class GP(Model):
|
|||
which_data_ycols, fixed_inputs,
|
||||
levels, samples, fignum, ax, resolution,
|
||||
plot_raw=plot_raw, Y_metadata=Y_metadata,
|
||||
data_symbol=data_symbol, **kw)
|
||||
data_symbol=data_symbol, apply_link=apply_link, **kw)
|
||||
|
||||
def plot(self, plot_limits=None, which_data_rows='all',
|
||||
which_data_ycols='all', fixed_inputs=[],
|
||||
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||
plot_raw=False,
|
||||
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
|
||||
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx', predict_kw=None):
|
||||
"""
|
||||
Plot the posterior of the GP.
|
||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||
|
|
@ -417,7 +444,7 @@ class GP(Model):
|
|||
which_data_ycols, fixed_inputs,
|
||||
levels, samples, fignum, ax, resolution,
|
||||
plot_raw=plot_raw, Y_metadata=Y_metadata,
|
||||
data_symbol=data_symbol, **kw)
|
||||
data_symbol=data_symbol, predict_kw=predict_kw, **kw)
|
||||
|
||||
def input_sensitivity(self, summarize=True):
|
||||
"""
|
||||
|
|
@ -441,7 +468,7 @@ class GP(Model):
|
|||
try:
|
||||
super(GP, self).optimize(optimizer, start, **kwargs)
|
||||
except KeyboardInterrupt:
|
||||
print "KeyboardInterrupt caught, calling on_optimization_end() to round things up"
|
||||
print("KeyboardInterrupt caught, calling on_optimization_end() to round things up")
|
||||
self.inference_method.on_optimization_end()
|
||||
raise
|
||||
|
||||
|
|
@ -458,3 +485,38 @@ class GP(Model):
|
|||
"""
|
||||
from ..inference.latent_function_inference.inferenceX import infer_newX
|
||||
return infer_newX(self, Y_new, optimize=optimize)
|
||||
|
||||
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
|
||||
"""
|
||||
Calculation of the log predictive density
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param x_test: test locations (x_{*})
|
||||
:type x_test: (Nx1) array
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
:param Y_metadata: metadata associated with the test points
|
||||
"""
|
||||
mu_star, var_star = self._raw_predict(x_test)
|
||||
return self.likelihood.log_predictive_density(y_test, mu_star, var_star, Y_metadata=Y_metadata)
|
||||
|
||||
def log_predictive_density_sampling(self, x_test, y_test, Y_metadata=None, num_samples=1000):
|
||||
"""
|
||||
Calculation of the log predictive density by sampling
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param x_test: test locations (x_{*})
|
||||
:type x_test: (Nx1) array
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
:param Y_metadata: metadata associated with the test points
|
||||
:param num_samples: number of samples to use in monte carlo integration
|
||||
:type num_samples: int
|
||||
"""
|
||||
mu_star, var_star = self._raw_predict(x_test)
|
||||
return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,14 @@
|
|||
# Copyright (c) 2013,2014, GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2015, James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import sys
|
||||
from parameterization import Parameterized
|
||||
from .parameterization import Parameterized
|
||||
import numpy as np
|
||||
|
||||
class Mapping(Parameterized):
|
||||
"""
|
||||
Base model for shared behavior between models that can act like a mapping.
|
||||
Base model for shared mapping behaviours
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, output_dim, name='mapping'):
|
||||
|
|
@ -18,49 +19,12 @@ class Mapping(Parameterized):
|
|||
def f(self, X):
|
||||
raise NotImplementedError
|
||||
|
||||
def df_dX(self, dL_df, X):
|
||||
"""Evaluate derivatives of mapping outputs with respect to inputs.
|
||||
|
||||
:param dL_df: gradient of the objective with respect to the function.
|
||||
:type dL_df: ndarray (num_data x output_dim)
|
||||
:param X: the input locations where derivatives are to be evaluated.
|
||||
:type X: ndarray (num_data x input_dim)
|
||||
:returns: matrix containing gradients of the function with respect to the inputs.
|
||||
"""
|
||||
def gradients_X(self, dL_dF, X):
|
||||
raise NotImplementedError
|
||||
|
||||
def df_dtheta(self, dL_df, X):
|
||||
"""The gradient of the outputs of the mapping with respect to each of the parameters.
|
||||
|
||||
:param dL_df: gradient of the objective with respect to the function.
|
||||
:type dL_df: ndarray (num_data x output_dim)
|
||||
:param X: input locations where the function is evaluated.
|
||||
:type X: ndarray (num_data x input_dim)
|
||||
:returns: Matrix containing gradients with respect to parameters of each output for each input data.
|
||||
:rtype: ndarray (num_params length)
|
||||
"""
|
||||
|
||||
def update_gradients(self, dL_dF, X):
|
||||
raise NotImplementedError
|
||||
|
||||
def plot(self, *args):
|
||||
"""
|
||||
Plots the mapping associated with the model.
|
||||
- In one dimension, the function is plotted.
|
||||
- In two dimensions, a contour-plot shows the function
|
||||
- In higher dimensions, we've not implemented this yet !TODO!
|
||||
|
||||
Can plot only part of the data and part of the posterior functions
|
||||
using which_data and which_functions
|
||||
|
||||
This is a convenience function: arguments are passed to
|
||||
GPy.plotting.matplot_dep.models_plots.plot_mapping
|
||||
"""
|
||||
|
||||
if "matplotlib" in sys.modules:
|
||||
from ..plotting.matplot_dep import models_plots
|
||||
mapping_plots.plot_mapping(self,*args)
|
||||
else:
|
||||
raise NameError, "matplotlib package has not been imported."
|
||||
|
||||
class Bijective_mapping(Mapping):
|
||||
"""
|
||||
|
|
@ -74,72 +38,4 @@ class Bijective_mapping(Mapping):
|
|||
"""Inverse mapping from output domain of the function to the inputs."""
|
||||
raise NotImplementedError
|
||||
|
||||
from model import Model
|
||||
|
||||
class Mapping_check_model(Model):
|
||||
"""
|
||||
This is a dummy model class used as a base class for checking that the
|
||||
gradients of a given mapping are implemented correctly. It enables
|
||||
checkgradient() to be called independently on each mapping.
|
||||
"""
|
||||
def __init__(self, mapping=None, dL_df=None, X=None):
|
||||
num_samples = 20
|
||||
if mapping==None:
|
||||
mapping = GPy.mapping.linear(1, 1)
|
||||
if X==None:
|
||||
X = np.random.randn(num_samples, mapping.input_dim)
|
||||
if dL_df==None:
|
||||
dL_df = np.ones((num_samples, mapping.output_dim))
|
||||
|
||||
self.mapping=mapping
|
||||
self.X = X
|
||||
self.dL_df = dL_df
|
||||
self.num_params = self.mapping.num_params
|
||||
Model.__init__(self)
|
||||
|
||||
|
||||
def _get_params(self):
|
||||
return self.mapping._get_params()
|
||||
|
||||
def _get_param_names(self):
|
||||
return self.mapping._get_param_names()
|
||||
|
||||
def _set_params(self, x):
|
||||
self.mapping._set_params(x)
|
||||
|
||||
def log_likelihood(self):
|
||||
return (self.dL_df*self.mapping.f(self.X)).sum()
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
raise NotImplementedError, "This needs to be implemented to use the Mapping_check_model class."
|
||||
|
||||
class Mapping_check_df_dtheta(Mapping_check_model):
|
||||
"""This class allows gradient checks for the gradient of a mapping with respect to parameters. """
|
||||
def __init__(self, mapping=None, dL_df=None, X=None):
|
||||
Mapping_check_model.__init__(self,mapping=mapping,dL_df=dL_df, X=X)
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.mapping.df_dtheta(self.dL_df, self.X)
|
||||
|
||||
|
||||
class Mapping_check_df_dX(Mapping_check_model):
|
||||
"""This class allows gradient checks for the gradient of a mapping with respect to X. """
|
||||
def __init__(self, mapping=None, dL_df=None, X=None):
|
||||
Mapping_check_model.__init__(self,mapping=mapping,dL_df=dL_df, X=X)
|
||||
|
||||
if dL_df==None:
|
||||
dL_df = np.ones((self.X.shape[0],self.mapping.output_dim))
|
||||
self.num_params = self.X.shape[0]*self.mapping.input_dim
|
||||
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.mapping.df_dX(self.dL_df, self.X).flatten()
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['X_' +str(i) + ','+str(j) for j in range(self.X.shape[1]) for i in range(self.X.shape[0])]
|
||||
|
||||
def _get_params(self):
|
||||
return self.X.flatten()
|
||||
|
||||
def _set_params(self, x):
|
||||
self.X=x.reshape(self.X.shape)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
from .. import likelihoods
|
||||
from ..inference import optimization
|
||||
from ..util.misc import opt_wrapper
|
||||
from parameterization import Parameterized
|
||||
from .parameterization import Parameterized
|
||||
import multiprocessing as mp
|
||||
import numpy as np
|
||||
from numpy.linalg.linalg import LinAlgError
|
||||
|
|
@ -13,6 +13,7 @@ import itertools
|
|||
import sys
|
||||
from .verbose_optimization import VerboseOptimization
|
||||
# import numdifftools as ndt
|
||||
from functools import reduce
|
||||
|
||||
class Model(Parameterized):
|
||||
_fail_count = 0 # Count of failed optimization steps (see objective)
|
||||
|
|
@ -30,7 +31,7 @@ class Model(Parameterized):
|
|||
self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500)
|
||||
|
||||
def log_likelihood(self):
|
||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||
raise NotImplementedError("this needs to be implemented to use the model class")
|
||||
def _log_likelihood_gradients(self):
|
||||
return self.gradient.copy()
|
||||
|
||||
|
|
@ -82,7 +83,7 @@ class Model(Parameterized):
|
|||
pool.close() # signal that no more data coming in
|
||||
pool.join() # wait for all the tasks to complete
|
||||
except KeyboardInterrupt:
|
||||
print "Ctrl+c received, terminating and joining pool."
|
||||
print("Ctrl+c received, terminating and joining pool.")
|
||||
pool.terminate()
|
||||
pool.join()
|
||||
|
||||
|
|
@ -95,10 +96,10 @@ class Model(Parameterized):
|
|||
self.optimization_runs.append(jobs[i].get())
|
||||
|
||||
if verbose:
|
||||
print("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt))
|
||||
print(("Optimization restart {0}/{1}, f = {2}".format(i + 1, num_restarts, self.optimization_runs[-1].f_opt)))
|
||||
except Exception as e:
|
||||
if robust:
|
||||
print("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts))
|
||||
print(("Warning - optimization restart {0}/{1} failed".format(i + 1, num_restarts)))
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
|
@ -119,7 +120,7 @@ class Model(Parameterized):
|
|||
|
||||
DEPRECATED.
|
||||
"""
|
||||
raise DeprecationWarning, 'parameters now have default constraints'
|
||||
raise DeprecationWarning('parameters now have default constraints')
|
||||
|
||||
def objective_function(self):
|
||||
"""
|
||||
|
|
@ -213,14 +214,14 @@ class Model(Parameterized):
|
|||
self.obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e10, 1e10)
|
||||
return obj_f, self.obj_grads
|
||||
|
||||
def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, **kwargs):
|
||||
def optimize(self, optimizer=None, start=None, messages=False, max_iters=1000, ipython_notebook=True, clear_after_finish=False, **kwargs):
|
||||
"""
|
||||
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
|
||||
|
||||
kwargs are passed to the optimizer. They can be:
|
||||
|
||||
:param max_f_eval: maximum number of function evaluations
|
||||
:type max_f_eval: int
|
||||
:param max_iters: maximum number of function evaluations
|
||||
:type max_iters: int
|
||||
:messages: True: Display messages during optimisation, "ipython_notebook":
|
||||
:type messages: bool"string
|
||||
:param optimizer: which optimizer to use (defaults to self.preferred optimizer)
|
||||
|
|
@ -237,10 +238,10 @@ class Model(Parameterized):
|
|||
|
||||
"""
|
||||
if self.is_fixed or self.size == 0:
|
||||
print 'nothing to optimize'
|
||||
print('nothing to optimize')
|
||||
|
||||
if not self.update_model():
|
||||
print "updates were off, setting updates on again"
|
||||
print("updates were off, setting updates on again")
|
||||
self.update_model(True)
|
||||
|
||||
if start == None:
|
||||
|
|
@ -305,7 +306,7 @@ class Model(Parameterized):
|
|||
transformed_index = (indices - (~self._fixes_).cumsum())[transformed_index[which[0]]]
|
||||
|
||||
if transformed_index.size == 0:
|
||||
print "No free parameters to check"
|
||||
print("No free parameters to check")
|
||||
return
|
||||
|
||||
# just check the global ratio
|
||||
|
|
@ -340,9 +341,9 @@ class Model(Parameterized):
|
|||
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
|
||||
cols = np.array(cols) + 5
|
||||
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
|
||||
header_string = map(lambda x: '|'.join(x), [header_string])
|
||||
header_string = list(map(lambda x: '|'.join(x), [header_string]))
|
||||
separator = '-' * len(header_string[0])
|
||||
print '\n'.join([header_string[0], separator])
|
||||
print('\n'.join([header_string[0], separator]))
|
||||
if target_param is None:
|
||||
param_index = range(len(x))
|
||||
transformed_index = param_index
|
||||
|
|
@ -358,19 +359,24 @@ class Model(Parameterized):
|
|||
transformed_index = param_index
|
||||
|
||||
if param_index.size == 0:
|
||||
print "No free parameters to check"
|
||||
print("No free parameters to check")
|
||||
return
|
||||
|
||||
gradient = self._grads(x).copy()
|
||||
np.where(gradient == 0, 1e-312, gradient)
|
||||
ret = True
|
||||
for nind, xind in itertools.izip(param_index, transformed_index):
|
||||
for nind, xind in zip(param_index, transformed_index):
|
||||
xx = x.copy()
|
||||
xx[xind] += step
|
||||
f1 = self._objective(xx)
|
||||
xx[xind] -= 2.*step
|
||||
f2 = self._objective(xx)
|
||||
#Avoid divide by zero, if any of the values are above 1e-15, otherwise both values are essentiall
|
||||
#the same
|
||||
if f1 > 1e-15 or f1 < -1e-15 or f2 > 1e-15 or f2 < -1e-15:
|
||||
df_ratio = np.abs((f1 - f2) / min(f1, f2))
|
||||
else:
|
||||
df_ratio = 1.0
|
||||
df_unstable = df_ratio < df_tolerance
|
||||
numerical_gradient = (f1 - f2) / (2 * step)
|
||||
if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
|
||||
|
|
@ -392,7 +398,7 @@ class Model(Parameterized):
|
|||
ng = '%.6f' % float(numerical_gradient)
|
||||
df = '%1.e' % float(df_ratio)
|
||||
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}|{5:^{c5}}".format(formatted_name, r, d, g, ng, df, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4], c5=cols[5])
|
||||
print grad_string
|
||||
print(grad_string)
|
||||
|
||||
self.optimizer_array = x
|
||||
return ret
|
||||
|
|
@ -402,6 +408,7 @@ class Model(Parameterized):
|
|||
model_details = [['<b>Model</b>', self.name + '<br>'],
|
||||
['<b>Log-likelihood</b>', '{}<br>'.format(float(self.log_likelihood()))],
|
||||
["<b>Number of Parameters</b>", '{}<br>'.format(self.size)],
|
||||
["<b>Number of Optimization Parameters</b>", '{}<br>'.format(self._size_transformed())],
|
||||
["<b>Updates</b>", '{}<br>'.format(self._update_on)],
|
||||
]
|
||||
from operator import itemgetter
|
||||
|
|
@ -419,6 +426,7 @@ class Model(Parameterized):
|
|||
model_details = [['Name', self.name],
|
||||
['Log-likelihood', '{}'.format(float(self.log_likelihood()))],
|
||||
["Number of Parameters", '{}'.format(self.size)],
|
||||
["Number of Optimization Parameters", '{}'.format(self._size_transformed())],
|
||||
["Updates", '{}'.format(self._update_on)],
|
||||
]
|
||||
from operator import itemgetter
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from param import Param, ObsAr
|
||||
from parameterized import Parameterized
|
||||
from .param import Param, ObsAr
|
||||
from .parameterized import Parameterized
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@
|
|||
|
||||
import numpy
|
||||
from numpy.lib.function_base import vectorize
|
||||
from lists_and_dicts import IntArrayDict
|
||||
from .lists_and_dicts import IntArrayDict
|
||||
from functools import reduce
|
||||
from transformations import Transformation
|
||||
|
||||
def extract_properties_to_index(index, props):
|
||||
prop_index = dict()
|
||||
|
|
@ -62,11 +64,14 @@ class ParameterIndexOperations(object):
|
|||
def __init__(self, constraints=None):
|
||||
self._properties = IntArrayDict()
|
||||
if constraints is not None:
|
||||
for t, i in constraints.iteritems():
|
||||
#python 3 fix
|
||||
#for t, i in constraints.iteritems():
|
||||
for t, i in constraints.items():
|
||||
self.add(t, i)
|
||||
|
||||
def iteritems(self):
|
||||
return self._properties.iteritems()
|
||||
#iteritems has gone in python 3
|
||||
#def iteritems(self):
|
||||
# return self._properties.iteritems()
|
||||
|
||||
def items(self):
|
||||
return self._properties.items()
|
||||
|
|
@ -75,7 +80,7 @@ class ParameterIndexOperations(object):
|
|||
return self._properties.keys()
|
||||
|
||||
def iterproperties(self):
|
||||
return self._properties.iterkeys()
|
||||
return iter(self._properties)
|
||||
|
||||
def shift_right(self, start, size):
|
||||
for ind in self.iterindices():
|
||||
|
|
@ -83,7 +88,7 @@ class ParameterIndexOperations(object):
|
|||
ind[toshift] += size
|
||||
|
||||
def shift_left(self, start, size):
|
||||
for v, ind in self.items():
|
||||
for v, ind in list(self.items()):
|
||||
todelete = (ind>=start) * (ind<start+size)
|
||||
if todelete.size != 0:
|
||||
ind = ind[~todelete]
|
||||
|
|
@ -101,7 +106,11 @@ class ParameterIndexOperations(object):
|
|||
return reduce(lambda a,b: a+b.size, self.iterindices(), 0)
|
||||
|
||||
def iterindices(self):
|
||||
try:
|
||||
return self._properties.itervalues()
|
||||
except AttributeError:
|
||||
#Changed this from itervalues to values for Py3 compatibility. It didn't break the test suite.
|
||||
return self._properties.values()
|
||||
|
||||
def indices(self):
|
||||
return self._properties.values()
|
||||
|
|
@ -150,14 +159,18 @@ class ParameterIndexOperations(object):
|
|||
return numpy.array([]).astype(int)
|
||||
|
||||
def update(self, parameter_index_view, offset=0):
|
||||
for i, v in parameter_index_view.iteritems():
|
||||
#py3 fix
|
||||
#for i, v in parameter_index_view.iteritems():
|
||||
for i, v in parameter_index_view.items():
|
||||
self.add(i, v+offset)
|
||||
|
||||
def copy(self):
|
||||
return self.__deepcopy__(None)
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
return ParameterIndexOperations(dict(self.iteritems()))
|
||||
#py3 fix
|
||||
#return ParameterIndexOperations(dict(self.iteritems()))
|
||||
return ParameterIndexOperations(dict(self.items()))
|
||||
|
||||
def __getitem__(self, prop):
|
||||
return self._properties[prop]
|
||||
|
|
@ -195,22 +208,26 @@ class ParameterIndexOperationsView(object):
|
|||
def _filter_index(self, ind):
|
||||
return ind[(ind >= self._offset) * (ind < (self._offset + self._size))] - self._offset
|
||||
|
||||
|
||||
def iteritems(self):
|
||||
for i, ind in self._param_index_ops.iteritems():
|
||||
#iteritems has gone in python 3. It has been renamed items()
|
||||
def items(self):
|
||||
_items_list = list(self._param_index_ops.items())
|
||||
for i, ind in _items_list:
|
||||
ind2 = self._filter_index(ind)
|
||||
if ind2.size > 0:
|
||||
yield i, ind2
|
||||
|
||||
def items(self):
|
||||
return [[i,v] for i,v in self.iteritems()]
|
||||
#Python 3 items() is now implemented as per py2 iteritems
|
||||
#def items(self):
|
||||
# return [[i,v] for i,v in self.iteritems()]
|
||||
|
||||
def properties(self):
|
||||
return [i for i in self.iterproperties()]
|
||||
|
||||
|
||||
def iterproperties(self):
|
||||
for i, _ in self.iteritems():
|
||||
#py3 fix
|
||||
#for i, _ in self.iteritems():
|
||||
for i, _ in self.items():
|
||||
yield i
|
||||
|
||||
|
||||
|
|
@ -230,7 +247,9 @@ class ParameterIndexOperationsView(object):
|
|||
|
||||
|
||||
def iterindices(self):
|
||||
for _, ind in self.iteritems():
|
||||
#py3 fix
|
||||
#for _, ind in self.iteritems():
|
||||
for _, ind in self.items():
|
||||
yield ind
|
||||
|
||||
|
||||
|
|
@ -286,10 +305,14 @@ class ParameterIndexOperationsView(object):
|
|||
|
||||
def __str__(self, *args, **kwargs):
|
||||
import pprint
|
||||
return pprint.pformat(dict(self.iteritems()))
|
||||
#py3 fixes
|
||||
#return pprint.pformat(dict(self.iteritems()))
|
||||
return pprint.pformat(dict(self.items()))
|
||||
|
||||
def update(self, parameter_index_view, offset=0):
|
||||
for i, v in parameter_index_view.iteritems():
|
||||
#py3 fixes
|
||||
#for i, v in parameter_index_view.iteritems():
|
||||
for i, v in parameter_index_view.items():
|
||||
self.add(i, v+offset)
|
||||
|
||||
|
||||
|
|
@ -297,6 +320,8 @@ class ParameterIndexOperationsView(object):
|
|||
return self.__deepcopy__(None)
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
return ParameterIndexOperations(dict(self.iteritems()))
|
||||
#py3 fix
|
||||
#return ParameterIndexOperations(dict(self.iteritems()))
|
||||
return ParameterIndexOperations(dict(self.items()))
|
||||
pass
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ class ArrayList(list):
|
|||
if el is item:
|
||||
return index
|
||||
index += 1
|
||||
raise ValueError, "{} is not in list".format(item)
|
||||
raise ValueError("{} is not in list".format(item))
|
||||
pass
|
||||
|
||||
class ObserverList(object):
|
||||
|
|
@ -75,7 +75,7 @@ class ObserverList(object):
|
|||
|
||||
def __str__(self):
|
||||
from . import ObsAr, Param
|
||||
from parameter_core import Parameterizable
|
||||
from .parameter_core import Parameterizable
|
||||
ret = []
|
||||
curr_p = None
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ class Observable(object):
|
|||
"""
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(Observable, self).__init__()
|
||||
from lists_and_dicts import ObserverList
|
||||
from .lists_and_dicts import ObserverList
|
||||
self.observers = ObserverList()
|
||||
self._update_on = True
|
||||
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
|
||||
import numpy as np
|
||||
from parameter_core import Pickleable
|
||||
from observable import Observable
|
||||
from .parameter_core import Pickleable
|
||||
from .observable import Observable
|
||||
|
||||
class ObsAr(np.ndarray, Pickleable, Observable):
|
||||
"""
|
||||
|
|
@ -39,7 +39,7 @@ class ObsAr(np.ndarray, Pickleable, Observable):
|
|||
return self.view(np.ndarray)
|
||||
|
||||
def copy(self):
|
||||
from lists_and_dicts import ObserverList
|
||||
from .lists_and_dicts import ObserverList
|
||||
memo = {}
|
||||
memo[id(self)] = self
|
||||
memo[id(self.observers)] = ObserverList()
|
||||
|
|
|
|||
|
|
@ -4,8 +4,9 @@
|
|||
import itertools
|
||||
import numpy
|
||||
np = numpy
|
||||
from parameter_core import Parameterizable, adjust_name_for_printing, Pickleable
|
||||
from observable_array import ObsAr
|
||||
from .parameter_core import Parameterizable, adjust_name_for_printing, Pickleable
|
||||
from .observable_array import ObsAr
|
||||
from functools import reduce
|
||||
|
||||
###### printing
|
||||
__constraints_name__ = "Constraint"
|
||||
|
|
@ -156,7 +157,7 @@ class Param(Parameterizable, ObsAr):
|
|||
#===========================================================================
|
||||
@property
|
||||
def is_fixed(self):
|
||||
from transformations import __fixed__
|
||||
from .transformations import __fixed__
|
||||
return self.constraints[__fixed__].size == self.size
|
||||
|
||||
def _get_original(self, param):
|
||||
|
|
@ -207,10 +208,14 @@ class Param(Parameterizable, ObsAr):
|
|||
return 0
|
||||
@property
|
||||
def _constraints_str(self):
|
||||
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))]
|
||||
#py3 fix
|
||||
#return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))]
|
||||
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.items()))]
|
||||
@property
|
||||
def _priors_str(self):
|
||||
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.iteritems()))]
|
||||
#py3 fix
|
||||
#return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.iteritems()))]
|
||||
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.items()))]
|
||||
@property
|
||||
def _ties_str(self):
|
||||
return ['']
|
||||
|
|
@ -279,7 +284,7 @@ class Param(Parameterizable, ObsAr):
|
|||
.tg th{font-family:"Courier New", Courier, monospace !important;font-weight:normal;color:#fff;background-color:#26ADE4;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:#DCDCDC;}
|
||||
.tg .tg-left{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:left;}
|
||||
.tg .tg-right{font-family:"Courier New", Courier, monospace !important;font-weight:normal;text-align:right;}
|
||||
</style>"""] + ['<table class="tg">'] + [header] + ["<tr><td class=tg-left>{i}</td><td class=tg-right>{x}</td><td class=tg-left>{c}</td><td class=tg-left>{p}</td><td class=tg-left>{t}</td></tr>".format(x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)] + ["</table>"])
|
||||
</style>"""] + ['<table class="tg">'] + [header] + ["<tr><td class=tg-left>{i}</td><td class=tg-right>{x}</td><td class=tg-left>{c}</td><td class=tg-left>{p}</td><td class=tg-left>{t}</td></tr>".format(x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in zip(indices, vals, constr_matrix, ties, prirs)] + ["</table>"])
|
||||
|
||||
def __str__(self, constr_matrix=None, indices=None, prirs=None, ties=None, lc=None, lx=None, li=None, lp=None, lt=None, only_name=False):
|
||||
filter_ = self._current_slice_
|
||||
|
|
@ -300,7 +305,7 @@ class Param(Parameterizable, ObsAr):
|
|||
if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing
|
||||
else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
|
||||
if not ties: ties = itertools.cycle([''])
|
||||
return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices
|
||||
return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in zip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices
|
||||
# except: return super(Param, self).__str__()
|
||||
|
||||
class ParamConcatenation(object):
|
||||
|
|
@ -313,7 +318,7 @@ class ParamConcatenation(object):
|
|||
See :py:class:`GPy.core.parameter.Param` for more details on constraining.
|
||||
"""
|
||||
# self.params = params
|
||||
from lists_and_dicts import ArrayList
|
||||
from .lists_and_dicts import ArrayList
|
||||
self.params = ArrayList([])
|
||||
for p in params:
|
||||
for p in p.flattened_parameters:
|
||||
|
|
@ -336,7 +341,9 @@ class ParamConcatenation(object):
|
|||
level += 1
|
||||
parent = parent._parent_
|
||||
import operator
|
||||
self.parents = map(lambda x: x[0], sorted(parents.iteritems(), key=operator.itemgetter(1)))
|
||||
#py3 fix
|
||||
#self.parents = map(lambda x: x[0], sorted(parents.iteritems(), key=operator.itemgetter(1)))
|
||||
self.parents = map(lambda x: x[0], sorted(parents.items(), key=operator.itemgetter(1)))
|
||||
#===========================================================================
|
||||
# Get/set items, enable broadcasting
|
||||
#===========================================================================
|
||||
|
|
@ -429,14 +436,14 @@ class ParamConcatenation(object):
|
|||
params = self.params
|
||||
constr_matrices, ties_matrices, prior_matrices = zip(*map(f, params))
|
||||
indices = [p._indices() for p in params]
|
||||
lc = max([p._max_len_names(cm, __constraints_name__) for p, cm in itertools.izip(params, constr_matrices)])
|
||||
lc = max([p._max_len_names(cm, __constraints_name__) for p, cm in zip(params, constr_matrices)])
|
||||
lx = max([p._max_len_values() for p in params])
|
||||
li = max([p._max_len_index(i) for p, i in itertools.izip(params, indices)])
|
||||
lt = max([p._max_len_names(tm, __tie_name__) for p, tm in itertools.izip(params, ties_matrices)])
|
||||
lp = max([p._max_len_names(pm, __constraints_name__) for p, pm in itertools.izip(params, prior_matrices)])
|
||||
li = max([p._max_len_index(i) for p, i in zip(params, indices)])
|
||||
lt = max([p._max_len_names(tm, __tie_name__) for p, tm in zip(params, ties_matrices)])
|
||||
lp = max([p._max_len_names(pm, __constraints_name__) for p, pm in zip(params, prior_matrices)])
|
||||
strings = []
|
||||
start = True
|
||||
for p, cm, i, tm, pm in itertools.izip(params,constr_matrices,indices,ties_matrices,prior_matrices):
|
||||
for p, cm, i, tm, pm in zip(params,constr_matrices,indices,ties_matrices,prior_matrices):
|
||||
strings.append(p.__str__(constr_matrix=cm, indices=i, prirs=pm, ties=tm, lc=lc, lx=lx, li=li, lp=lp, lt=lt, only_name=(1-start)))
|
||||
start = False
|
||||
return "\n".join(strings)
|
||||
|
|
|
|||
|
|
@ -13,11 +13,12 @@ Observable Pattern for patameterization
|
|||
|
||||
"""
|
||||
|
||||
from transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
||||
from .transformations import Transformation,Logexp, NegativeLogexp, Logistic, __fixed__, FIXED, UNFIXED
|
||||
import numpy as np
|
||||
import re
|
||||
import logging
|
||||
from updateable import Updateable
|
||||
from .updateable import Updateable
|
||||
from functools import reduce
|
||||
|
||||
class HierarchyError(Exception):
|
||||
"""
|
||||
|
|
@ -36,7 +37,7 @@ def adjust_name_for_printing(name):
|
|||
name = name.replace("/", "_l_").replace("@", '_at_')
|
||||
name = name.replace("(", "_of_").replace(")", "")
|
||||
if re.match(r'^[a-zA-Z_][a-zA-Z0-9-_]*$', name) is None:
|
||||
raise NameError, "name {} converted to {} cannot be further converted to valid python variable name!".format(name2, name)
|
||||
raise NameError("name {} converted to {} cannot be further converted to valid python variable name!".format(name2, name))
|
||||
return name
|
||||
return ''
|
||||
|
||||
|
|
@ -65,13 +66,13 @@ class Parentable(object):
|
|||
Gets called, when the parent changed, so we can adjust our
|
||||
inner attributes according to the new parent.
|
||||
"""
|
||||
raise NotImplementedError, "shouldnt happen, Parentable objects need to be able to change their parent"
|
||||
raise NotImplementedError("shouldnt happen, Parentable objects need to be able to change their parent")
|
||||
|
||||
def _disconnect_parent(self, *args, **kw):
|
||||
"""
|
||||
Disconnect this object from its parent
|
||||
"""
|
||||
raise NotImplementedError, "Abstract superclass"
|
||||
raise NotImplementedError("Abstract superclass")
|
||||
|
||||
@property
|
||||
def _highest_parent_(self):
|
||||
|
|
@ -109,7 +110,10 @@ class Pickleable(object):
|
|||
it properly.
|
||||
:param protocol: pickling protocol to use, python-pickle for details.
|
||||
"""
|
||||
try: #Py2
|
||||
import cPickle as pickle
|
||||
except ImportError: #Py3
|
||||
import pickle
|
||||
if isinstance(f, str):
|
||||
with open(f, 'wb') as f:
|
||||
pickle.dump(self, f, protocol)
|
||||
|
|
@ -138,9 +142,9 @@ class Pickleable(object):
|
|||
which = self
|
||||
which.traverse_parents(parents.append) # collect parents
|
||||
for p in parents:
|
||||
if not memo.has_key(id(p)):memo[id(p)] = None # set all parents to be None, so they will not be copied
|
||||
if not memo.has_key(id(self.gradient)):memo[id(self.gradient)] = None # reset the gradient
|
||||
if not memo.has_key(id(self._fixes_)):memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
|
||||
if not id(p) in memo :memo[id(p)] = None # set all parents to be None, so they will not be copied
|
||||
if not id(self.gradient) in memo:memo[id(self.gradient)] = None # reset the gradient
|
||||
if not id(self._fixes_) in memo :memo[id(self._fixes_)] = None # fixes have to be reset, as this is now highest parent
|
||||
copy = copy.deepcopy(self, memo) # and start the copy
|
||||
copy._parent_index_ = None
|
||||
copy._trigger_params_changed()
|
||||
|
|
@ -163,14 +167,16 @@ class Pickleable(object):
|
|||
'_Cacher_wrap__cachers', # never pickle cachers
|
||||
]
|
||||
dc = dict()
|
||||
for k,v in self.__dict__.iteritems():
|
||||
#py3 fix
|
||||
#for k,v in self.__dict__.iteritems():
|
||||
for k,v in self.__dict__.items():
|
||||
if k not in ignore_list:
|
||||
dc[k] = v
|
||||
return dc
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.__dict__.update(state)
|
||||
from lists_and_dicts import ObserverList
|
||||
from .lists_and_dicts import ObserverList
|
||||
self.observers = ObserverList()
|
||||
self._setup_observers()
|
||||
self._optimizer_copy_transformed = False
|
||||
|
|
@ -214,7 +220,7 @@ class Gradcheckable(Pickleable, Parentable):
|
|||
Perform the checkgrad on the model.
|
||||
TODO: this can be done more efficiently, when doing it inside here
|
||||
"""
|
||||
raise HierarchyError, "This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!"
|
||||
raise HierarchyError("This parameter is not in a model with a likelihood, and, therefore, cannot be gradient checked!")
|
||||
|
||||
class Nameable(Gradcheckable):
|
||||
"""
|
||||
|
|
@ -268,7 +274,7 @@ class Indexable(Nameable, Updateable):
|
|||
def __init__(self, name, default_constraint=None, *a, **kw):
|
||||
super(Indexable, self).__init__(name=name, *a, **kw)
|
||||
self._default_constraint_ = default_constraint
|
||||
from index_operations import ParameterIndexOperations
|
||||
from .index_operations import ParameterIndexOperations
|
||||
self.constraints = ParameterIndexOperations()
|
||||
self.priors = ParameterIndexOperations()
|
||||
if self._default_constraint_ is not None:
|
||||
|
|
@ -310,7 +316,7 @@ class Indexable(Nameable, Updateable):
|
|||
that is an int array, containing the indexes for the flattened
|
||||
param inside this parameterized logic.
|
||||
"""
|
||||
from param import ParamConcatenation
|
||||
from .param import ParamConcatenation
|
||||
if isinstance(param, ParamConcatenation):
|
||||
return np.hstack((self._raveled_index_for(p) for p in param.params))
|
||||
return param._raveled_index() + self._offset_for(param)
|
||||
|
|
@ -407,7 +413,7 @@ class Indexable(Nameable, Updateable):
|
|||
repriorized = self.unset_priors()
|
||||
self._add_to_index_operations(self.priors, repriorized, prior, warning)
|
||||
|
||||
from domains import _REAL, _POSITIVE, _NEGATIVE
|
||||
from .domains import _REAL, _POSITIVE, _NEGATIVE
|
||||
if prior.domain is _POSITIVE:
|
||||
self.constrain_positive(warning)
|
||||
elif prior.domain is _NEGATIVE:
|
||||
|
|
@ -426,7 +432,9 @@ class Indexable(Nameable, Updateable):
|
|||
"""evaluate the prior"""
|
||||
if self.priors.size > 0:
|
||||
x = self.param_array
|
||||
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
|
||||
#py3 fix
|
||||
#return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
|
||||
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
|
||||
return 0.
|
||||
|
||||
def _log_prior_gradients(self):
|
||||
|
|
@ -434,7 +442,9 @@ class Indexable(Nameable, Updateable):
|
|||
if self.priors.size > 0:
|
||||
x = self.param_array
|
||||
ret = np.zeros(x.size)
|
||||
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
|
||||
#py3 fix
|
||||
#[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
|
||||
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
|
||||
return ret
|
||||
return 0.
|
||||
|
||||
|
|
@ -536,7 +546,7 @@ class Indexable(Nameable, Updateable):
|
|||
update the constraints and priors view, so that
|
||||
constraining is automized for the parent.
|
||||
"""
|
||||
from index_operations import ParameterIndexOperationsView
|
||||
from .index_operations import ParameterIndexOperationsView
|
||||
#if getattr(self, "_in_init_"):
|
||||
#import ipdb;ipdb.set_trace()
|
||||
#self.constraints.update(param.constraints, start)
|
||||
|
|
@ -558,7 +568,7 @@ class Indexable(Nameable, Updateable):
|
|||
"""
|
||||
if warning and reconstrained.size > 0:
|
||||
# TODO: figure out which parameters have changed and only print those
|
||||
print "WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name)
|
||||
print("WARNING: reconstraining parameters {}".format(self.hierarchy_name() or self.name))
|
||||
index = self._raveled_index()
|
||||
which.add(what, index)
|
||||
return index
|
||||
|
|
@ -571,7 +581,7 @@ class Indexable(Nameable, Updateable):
|
|||
if len(transforms) == 0:
|
||||
transforms = which.properties()
|
||||
removed = np.empty((0,), dtype=int)
|
||||
for t in transforms:
|
||||
for t in list(transforms):
|
||||
unconstrained = which.remove(t, self._raveled_index())
|
||||
removed = np.union1d(removed, unconstrained)
|
||||
if t is __fixed__:
|
||||
|
|
@ -612,7 +622,9 @@ class OptimizationHandlable(Indexable):
|
|||
|
||||
if not self._optimizer_copy_transformed:
|
||||
self._optimizer_copy_.flat = self.param_array.flat
|
||||
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
#py3 fix
|
||||
#[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
[np.put(self._optimizer_copy_, ind, c.finv(self.param_array[ind])) for c, ind in self.constraints.items() if c != __fixed__]
|
||||
if self.has_parent() and (self.constraints[__fixed__].size != 0 or self._has_ties()):
|
||||
fixes = np.ones(self.size).astype(bool)
|
||||
fixes[self.constraints[__fixed__]] = FIXED
|
||||
|
|
@ -641,21 +653,25 @@ class OptimizationHandlable(Indexable):
|
|||
if f is None:
|
||||
self.param_array.flat = p
|
||||
[np.put(self.param_array, ind, c.f(self.param_array.flat[ind]))
|
||||
for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
#py3 fix
|
||||
#for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
for c, ind in self.constraints.items() if c != __fixed__]
|
||||
else:
|
||||
self.param_array.flat[f] = p
|
||||
[np.put(self.param_array, ind[f[ind]], c.f(self.param_array.flat[ind[f[ind]]]))
|
||||
for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
#py3 fix
|
||||
#for c, ind in self.constraints.iteritems() if c != __fixed__]
|
||||
for c, ind in self.constraints.items() if c != __fixed__]
|
||||
#self._highest_parent_.tie.propagate_val()
|
||||
|
||||
self._optimizer_copy_transformed = False
|
||||
self.trigger_update()
|
||||
|
||||
def _get_params_transformed(self):
|
||||
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
|
||||
raise DeprecationWarning("_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!")
|
||||
#
|
||||
def _set_params_transformed(self, p):
|
||||
raise DeprecationWarning, "_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!"
|
||||
raise DeprecationWarning("_get|set_params{_optimizer_copy_transformed} is deprecated, use self.optimizer array insetad!")
|
||||
|
||||
def _trigger_params_changed(self, trigger_parent=True):
|
||||
"""
|
||||
|
|
@ -680,7 +696,9 @@ class OptimizationHandlable(Indexable):
|
|||
constraint to it.
|
||||
"""
|
||||
self._highest_parent_.tie.collate_gradient()
|
||||
[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||
#py3 fix
|
||||
#[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||
[np.put(g, i, c.gradfactor(self.param_array[i], g[i])) for c, i in self.constraints.items() if c != __fixed__]
|
||||
if self._has_fixes(): return g[self._fixes_]
|
||||
return g
|
||||
|
||||
|
|
@ -690,7 +708,9 @@ class OptimizationHandlable(Indexable):
|
|||
constraint to it.
|
||||
"""
|
||||
self._highest_parent_.tie.collate_gradient()
|
||||
[np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||
#py3 fix
|
||||
#[np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.iteritems() if c != __fixed__]
|
||||
[np.put(g, i, c.gradfactor_non_natural(self.param_array[i], g[i])) for c, i in self.constraints.items() if c != __fixed__]
|
||||
if self._has_fixes(): return g[self._fixes_]
|
||||
return g
|
||||
|
||||
|
|
@ -701,7 +721,7 @@ class OptimizationHandlable(Indexable):
|
|||
Return the number of parameters of this parameter_handle.
|
||||
Param objects will always return 0.
|
||||
"""
|
||||
raise NotImplemented, "Abstract, please implement in respective classes"
|
||||
raise NotImplemented("Abstract, please implement in respective classes")
|
||||
|
||||
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
|
||||
"""
|
||||
|
|
@ -750,7 +770,9 @@ class OptimizationHandlable(Indexable):
|
|||
self.optimizer_array = x # makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
||||
# now draw from prior where possible
|
||||
x = self.param_array.copy()
|
||||
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
|
||||
#Py3 fix
|
||||
#[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.iteritems() if not p is None]
|
||||
[np.put(x, ind, p.rvs(ind.size)) for p, ind in self.priors.items() if not p is None]
|
||||
unfixlist = np.ones((self.size,),dtype=np.bool)
|
||||
unfixlist[self.constraints[__fixed__]] = False
|
||||
self.param_array.flat[unfixlist] = x.view(np.ndarray).ravel()[unfixlist]
|
||||
|
|
@ -947,7 +969,7 @@ class Parameterizable(OptimizationHandlable):
|
|||
self._add_parameter_name(param, ignore_added_names)
|
||||
# and makes sure to not delete programmatically added parameters
|
||||
for other in self.parameters[::-1]:
|
||||
if other is not param and other.name.startswith(param.name):
|
||||
if other is not param and other.name == param.name:
|
||||
warn_and_retry(param, _name_digit.match(other.name))
|
||||
return
|
||||
if pname not in dir(self):
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
# Copyright (c) 2014, Max Zwiessele, James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
import six # For metaclass support in Python 2 and 3 simultaneously
|
||||
import numpy; np = numpy
|
||||
import itertools
|
||||
from re import compile, _pattern_type
|
||||
from param import ParamConcatenation
|
||||
from .param import ParamConcatenation
|
||||
from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
|
||||
|
||||
import logging
|
||||
from GPy.core.parameterization.index_operations import ParameterIndexOperationsView
|
||||
from index_operations import ParameterIndexOperationsView
|
||||
logger = logging.getLogger("parameters changed meta")
|
||||
|
||||
class ParametersChangedMeta(type):
|
||||
|
|
@ -27,6 +27,7 @@ class ParametersChangedMeta(type):
|
|||
self.parameters_changed()
|
||||
return self
|
||||
|
||||
@six.add_metaclass(ParametersChangedMeta)
|
||||
class Parameterized(Parameterizable):
|
||||
"""
|
||||
Parameterized class
|
||||
|
|
@ -73,7 +74,9 @@ class Parameterized(Parameterizable):
|
|||
# Metaclass for parameters changed after init.
|
||||
# This makes sure, that parameters changed will always be called after __init__
|
||||
# **Never** call parameters_changed() yourself
|
||||
__metaclass__ = ParametersChangedMeta
|
||||
#This is ignored in Python 3 -- you need to put the meta class in the function definition.
|
||||
#__metaclass__ = ParametersChangedMeta
|
||||
#The six module is used to support both Python 2 and 3 simultaneously
|
||||
#===========================================================================
|
||||
def __init__(self, name=None, parameters=[], *a, **kw):
|
||||
super(Parameterized, self).__init__(name=name, *a, **kw)
|
||||
|
|
@ -131,7 +134,7 @@ class Parameterized(Parameterizable):
|
|||
if param.has_parent():
|
||||
def visit(parent, self):
|
||||
if parent is self:
|
||||
raise HierarchyError, "You cannot add a parameter twice into the hierarchy"
|
||||
raise HierarchyError("You cannot add a parameter twice into the hierarchy")
|
||||
param.traverse_parents(visit, self)
|
||||
param._parent_.unlink_parameter(param)
|
||||
# make sure the size is set
|
||||
|
|
@ -173,7 +176,7 @@ class Parameterized(Parameterizable):
|
|||
self._highest_parent_._connect_fixes()
|
||||
|
||||
else:
|
||||
raise HierarchyError, """Parameter exists already, try making a copy"""
|
||||
raise HierarchyError("""Parameter exists already, try making a copy""")
|
||||
|
||||
|
||||
def link_parameters(self, *parameters):
|
||||
|
|
@ -189,9 +192,9 @@ class Parameterized(Parameterizable):
|
|||
"""
|
||||
if not param in self.parameters:
|
||||
try:
|
||||
raise RuntimeError, "{} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name)
|
||||
raise RuntimeError("{} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name))
|
||||
except AttributeError:
|
||||
raise RuntimeError, "{} does not seem to be a parameter, remove parameters directly from their respective parents".format(str(param))
|
||||
raise RuntimeError("{} does not seem to be a parameter, remove parameters directly from their respective parents".format(str(param)))
|
||||
|
||||
start = sum([p.size for p in self.parameters[:param._parent_index_]])
|
||||
self._remove_parameter_name(param)
|
||||
|
|
@ -215,9 +218,9 @@ class Parameterized(Parameterizable):
|
|||
self._highest_parent_._notify_parent_change()
|
||||
|
||||
def add_parameter(self, *args, **kwargs):
|
||||
raise DeprecationWarning, "add_parameter was renamed to link_parameter to avoid confusion of setting variables, use link_parameter instead"
|
||||
raise DeprecationWarning("add_parameter was renamed to link_parameter to avoid confusion of setting variables, use link_parameter instead")
|
||||
def remove_parameter(self, *args, **kwargs):
|
||||
raise DeprecationWarning, "remove_parameter was renamed to unlink_parameter to avoid confusion of setting variables, use unlink_parameter instead"
|
||||
raise DeprecationWarning("remove_parameter was renamed to unlink_parameter to avoid confusion of setting variables, use unlink_parameter instead")
|
||||
|
||||
def _connect_parameters(self, ignore_added_names=False):
|
||||
# connect parameterlist to this parameterized object
|
||||
|
|
@ -237,7 +240,7 @@ class Parameterized(Parameterizable):
|
|||
self._param_slices_ = []
|
||||
for i, p in enumerate(self.parameters):
|
||||
if not p.param_array.flags['C_CONTIGUOUS']:
|
||||
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
|
||||
raise ValueError("This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS")
|
||||
|
||||
p._parent_ = self
|
||||
p._parent_index_ = i
|
||||
|
|
@ -268,7 +271,7 @@ class Parameterized(Parameterizable):
|
|||
"""
|
||||
if not isinstance(regexp, _pattern_type): regexp = compile(regexp)
|
||||
found_params = []
|
||||
for n, p in itertools.izip(self.parameter_names(False, False, True), self.flattened_parameters):
|
||||
for n, p in zip(self.parameter_names(False, False, True), self.flattened_parameters):
|
||||
if regexp.match(n) is not None:
|
||||
found_params.append(p)
|
||||
return found_params
|
||||
|
|
@ -279,7 +282,7 @@ class Parameterized(Parameterizable):
|
|||
else:
|
||||
if paramlist is None:
|
||||
paramlist = self.grep_param_names(name)
|
||||
if len(paramlist) < 1: raise AttributeError, name
|
||||
if len(paramlist) < 1: raise AttributeError(name)
|
||||
if len(paramlist) == 1:
|
||||
if isinstance(paramlist[-1], Parameterized):
|
||||
paramlist = paramlist[-1].flattened_parameters
|
||||
|
|
@ -295,7 +298,7 @@ class Parameterized(Parameterizable):
|
|||
try:
|
||||
self.param_array[name] = value
|
||||
except:
|
||||
raise ValueError, "Setting by slice or index only allowed with array-like"
|
||||
raise ValueError("Setting by slice or index only allowed with array-like")
|
||||
self.trigger_update()
|
||||
else:
|
||||
try: param = self.__getitem__(name, paramlist)
|
||||
|
|
@ -325,7 +328,7 @@ class Parameterized(Parameterizable):
|
|||
self._notify_parent_change()
|
||||
self.parameters_changed()
|
||||
except Exception as e:
|
||||
print "WARNING: caught exception {!s}, trying to continue".format(e)
|
||||
print("WARNING: caught exception {!s}, trying to continue".format(e))
|
||||
|
||||
def copy(self, memo=None):
|
||||
if memo is None:
|
||||
|
|
@ -379,7 +382,7 @@ class Parameterized(Parameterizable):
|
|||
pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
|
||||
format_spec = "<tr><td class=tg-left>{{name:<{0}s}}</td><td class=tg-right>{{desc:>{1}s}}</td><td class=tg-left>{{const:^{2}s}}</td><td class=tg-left>{{pri:^{3}s}}</td><td class=tg-left>{{t:^{4}s}}</td></tr>".format(nl, sl, cl, pl, tl)
|
||||
to_print = []
|
||||
for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
|
||||
for n, d, c, t, p in zip(names, desc, constrs, ts, prirs):
|
||||
to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
|
||||
sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
|
||||
if header:
|
||||
|
|
@ -414,7 +417,7 @@ class Parameterized(Parameterizable):
|
|||
pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
|
||||
format_spec = " \033[1m{{name:<{0}s}}\033[0;0m | {{desc:>{1}s}} | {{const:^{2}s}} | {{pri:^{3}s}} | {{t:^{4}s}}".format(nl, sl, cl, pl, tl)
|
||||
to_print = []
|
||||
for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
|
||||
for n, d, c, t, p in zip(names, desc, constrs, ts, prirs):
|
||||
to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
|
||||
sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
|
||||
if header:
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
import numpy as np
|
||||
from scipy.special import gammaln, digamma
|
||||
from ...util.linalg import pdinv
|
||||
from domains import _REAL, _POSITIVE
|
||||
from .domains import _REAL, _POSITIVE
|
||||
import warnings
|
||||
import weakref
|
||||
|
||||
|
|
@ -15,7 +15,11 @@ class Prior(object):
|
|||
_instance = None
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if not cls._instance or cls._instance.__class__ is not cls:
|
||||
cls._instance = super(Prior, cls).__new__(cls, *args, **kwargs)
|
||||
newfunc = super(Prior, cls).__new__
|
||||
if newfunc is object.__new__:
|
||||
cls._instance = newfunc(cls)
|
||||
else:
|
||||
cls._instance = newfunc(cls, *args, **kwargs)
|
||||
return cls._instance
|
||||
|
||||
def pdf(self, x):
|
||||
|
|
@ -52,7 +56,11 @@ class Gaussian(Prior):
|
|||
for instance in cls._instances:
|
||||
if instance().mu == mu and instance().sigma == sigma:
|
||||
return instance()
|
||||
o = super(Prior, cls).__new__(cls, mu, sigma)
|
||||
newfunc = super(Prior, cls).__new__
|
||||
if newfunc is object.__new__:
|
||||
o = newfunc(cls)
|
||||
else:
|
||||
o = newfunc(cls, mu, sigma)
|
||||
cls._instances.append(weakref.ref(o))
|
||||
return cls._instances[-1]()
|
||||
|
||||
|
|
@ -140,7 +148,11 @@ class LogGaussian(Gaussian):
|
|||
for instance in cls._instances:
|
||||
if instance().mu == mu and instance().sigma == sigma:
|
||||
return instance()
|
||||
o = super(Prior, cls).__new__(cls, mu, sigma)
|
||||
newfunc = super(Prior, cls).__new__
|
||||
if newfunc is object.__new__:
|
||||
o = newfunc(cls)
|
||||
else:
|
||||
o = newfunc(cls, mu, sigma)
|
||||
cls._instances.append(weakref.ref(o))
|
||||
return cls._instances[-1]()
|
||||
|
||||
|
|
@ -258,7 +270,11 @@ class Gamma(Prior):
|
|||
for instance in cls._instances:
|
||||
if instance().a == a and instance().b == b:
|
||||
return instance()
|
||||
o = super(Prior, cls).__new__(cls, a, b)
|
||||
newfunc = super(Prior, cls).__new__
|
||||
if newfunc is object.__new__:
|
||||
o = newfunc(cls)
|
||||
else:
|
||||
o = newfunc(cls, a, b)
|
||||
cls._instances.append(weakref.ref(o))
|
||||
return cls._instances[-1]()
|
||||
|
||||
|
|
@ -398,7 +414,7 @@ class DGPLVM_KFDA(Prior):
|
|||
def compute_cls(self, x):
|
||||
cls = {}
|
||||
# Appending each data point to its proper class
|
||||
for j in xrange(self.datanum):
|
||||
for j in range(self.datanum):
|
||||
class_label = self.get_class_label(self.lbl[j])
|
||||
if class_label not in cls:
|
||||
cls[class_label] = []
|
||||
|
|
@ -537,7 +553,7 @@ class DGPLVM(Prior):
|
|||
def compute_cls(self, x):
|
||||
cls = {}
|
||||
# Appending each data point to its proper class
|
||||
for j in xrange(self.datanum):
|
||||
for j in range(self.datanum):
|
||||
class_label = self.get_class_label(self.lbl[j])
|
||||
if class_label not in cls:
|
||||
cls[class_label] = []
|
||||
|
|
@ -556,7 +572,7 @@ class DGPLVM(Prior):
|
|||
# Adding data points as tuple to the dictionary so that we can access indices
|
||||
def compute_indices(self, x):
|
||||
data_idx = {}
|
||||
for j in xrange(self.datanum):
|
||||
for j in range(self.datanum):
|
||||
class_label = self.get_class_label(self.lbl[j])
|
||||
if class_label not in data_idx:
|
||||
data_idx[class_label] = []
|
||||
|
|
@ -575,7 +591,7 @@ class DGPLVM(Prior):
|
|||
else:
|
||||
lst_idx = []
|
||||
# Here we put indices of each class in to the list called lst_idx_all
|
||||
for m in xrange(len(data_idx[i])):
|
||||
for m in range(len(data_idx[i])):
|
||||
lst_idx.append(data_idx[i][m][0])
|
||||
lst_idx_all.append(lst_idx)
|
||||
return lst_idx_all
|
||||
|
|
@ -611,7 +627,7 @@ class DGPLVM(Prior):
|
|||
# pdb.set_trace()
|
||||
# Calculating Bi
|
||||
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
|
||||
for k in xrange(self.datanum):
|
||||
for k in range(self.datanum):
|
||||
for i in data_idx:
|
||||
N_i = float(len(data_idx[i]))
|
||||
if k in lst_idx_all[i]:
|
||||
|
|
@ -712,8 +728,11 @@ class DGPLVM(Prior):
|
|||
return 'DGPLVM_prior_Raq'
|
||||
|
||||
|
||||
# ******************************************
|
||||
|
||||
class DGPLVM_T(Prior):
|
||||
from .. import Parameterized
|
||||
from .. import Param
|
||||
class DGPLVM_Lamda(Prior, Parameterized):
|
||||
"""
|
||||
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
|
||||
|
||||
|
|
@ -734,16 +753,18 @@ class DGPLVM_T(Prior):
|
|||
# cls._instances.append(weakref.ref(o))
|
||||
# return cls._instances[-1]()
|
||||
|
||||
def __init__(self, sigma2, lbl, x_shape, vec):
|
||||
def __init__(self, sigma2, lbl, x_shape, lamda, name='DP_prior'):
|
||||
super(DGPLVM_Lamda, self).__init__(name=name)
|
||||
self.sigma2 = sigma2
|
||||
# self.x = x
|
||||
self.lbl = lbl
|
||||
self.lamda = lamda
|
||||
self.classnum = lbl.shape[1]
|
||||
self.datanum = lbl.shape[0]
|
||||
self.x_shape = x_shape
|
||||
self.dim = x_shape[1]
|
||||
self.vec = vec
|
||||
|
||||
self.lamda = Param('lamda', np.diag(lamda))
|
||||
self.link_parameter(self.lamda)
|
||||
|
||||
def get_class_label(self, y):
|
||||
for idx, v in enumerate(y):
|
||||
|
|
@ -764,11 +785,11 @@ class DGPLVM_T(Prior):
|
|||
return cls
|
||||
|
||||
# This function computes mean of each class. The mean is calculated through each dimension
|
||||
def compute_Mi(self, cls, vec):
|
||||
def compute_Mi(self, cls):
|
||||
M_i = np.zeros((self.classnum, self.dim))
|
||||
for i in cls:
|
||||
# Mean of each class
|
||||
class_i = np.multiply(cls[i],vec)
|
||||
class_i = cls[i]
|
||||
M_i[i] = np.mean(class_i, axis=0)
|
||||
return M_i
|
||||
|
||||
|
|
@ -822,7 +843,7 @@ class DGPLVM_T(Prior):
|
|||
|
||||
# Calculating beta and Bi for Sb
|
||||
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
|
||||
# import pdb
|
||||
import pdb
|
||||
# pdb.set_trace()
|
||||
B_i = np.zeros((self.classnum, self.dim))
|
||||
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
|
||||
|
|
@ -874,9 +895,256 @@ class DGPLVM_T(Prior):
|
|||
# This function calculates log of our prior
|
||||
def lnpdf(self, x):
|
||||
x = x.reshape(self.x_shape)
|
||||
|
||||
#!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()
|
||||
|
||||
xprime = x.dot(np.diagflat(self.lamda))
|
||||
x = xprime
|
||||
# print x
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
M_i = self.compute_Mi(cls, self.vec)
|
||||
M_i = self.compute_Mi(cls)
|
||||
Sb = self.compute_Sb(cls, M_i, M_0)
|
||||
Sw = self.compute_Sw(cls, M_i)
|
||||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
||||
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
|
||||
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
|
||||
|
||||
# This function calculates derivative of the log of prior function
|
||||
def lnpdf_grad(self, x):
|
||||
x = x.reshape(self.x_shape)
|
||||
xprime = x.dot(np.diagflat(self.lamda))
|
||||
x = xprime
|
||||
# print x
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
M_i = self.compute_Mi(cls)
|
||||
Sb = self.compute_Sb(cls, M_i, M_0)
|
||||
Sw = self.compute_Sw(cls, M_i)
|
||||
data_idx = self.compute_indices(x)
|
||||
lst_idx_all = self.compute_listIndices(data_idx)
|
||||
Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
|
||||
W_i = self.compute_wj(data_idx, M_i)
|
||||
Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
|
||||
|
||||
# Calculating inverse of Sb and its transpose and minus
|
||||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
|
||||
#Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
|
||||
Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
|
||||
Sb_inv_N_trans = np.transpose(Sb_inv_N)
|
||||
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
|
||||
Sw_trans = np.transpose(Sw)
|
||||
|
||||
# Calculating DJ/DXk
|
||||
DJ_Dxk = 2 * (
|
||||
Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
|
||||
Sig_alpha_W_i))
|
||||
# Calculating derivative of the log of the prior
|
||||
DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
|
||||
|
||||
DPxprim_Dx = np.diagflat(self.lamda).dot(DPx_Dx)
|
||||
|
||||
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
|
||||
DPxprim_Dx = DPxprim_Dx.T
|
||||
|
||||
DPxprim_Dlamda = DPx_Dx.dot(x)
|
||||
|
||||
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
|
||||
DPxprim_Dlamda = DPxprim_Dlamda.T
|
||||
|
||||
self.lamda.gradient = np.diag(DPxprim_Dlamda)
|
||||
# print DPxprim_Dx
|
||||
return DPxprim_Dx
|
||||
|
||||
|
||||
# def frb(self, x):
|
||||
# from functools import partial
|
||||
# from GPy.models import GradientChecker
|
||||
# f = partial(self.lnpdf)
|
||||
# df = partial(self.lnpdf_grad)
|
||||
# grad = GradientChecker(f, df, x, 'X')
|
||||
# grad.checkgrad(verbose=1)
|
||||
|
||||
def rvs(self, n):
|
||||
return np.random.rand(n) # A WRONG implementation
|
||||
|
||||
def __str__(self):
|
||||
return 'DGPLVM_prior_Raq_Lamda'
|
||||
|
||||
# ******************************************
|
||||
|
||||
class DGPLVM_T(Prior):
|
||||
"""
|
||||
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
|
||||
|
||||
:param sigma2: constant
|
||||
|
||||
.. Note:: DGPLVM for Classification paper implementation
|
||||
|
||||
"""
|
||||
domain = _REAL
|
||||
# _instances = []
|
||||
# def __new__(cls, mu, sigma): # Singleton:
|
||||
# if cls._instances:
|
||||
# cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||
# for instance in cls._instances:
|
||||
# if instance().mu == mu and instance().sigma == sigma:
|
||||
# return instance()
|
||||
# o = super(Prior, cls).__new__(cls, mu, sigma)
|
||||
# cls._instances.append(weakref.ref(o))
|
||||
# return cls._instances[-1]()
|
||||
|
||||
def __init__(self, sigma2, lbl, x_shape, vec):
|
||||
self.sigma2 = sigma2
|
||||
# self.x = x
|
||||
self.lbl = lbl
|
||||
self.classnum = lbl.shape[1]
|
||||
self.datanum = lbl.shape[0]
|
||||
self.x_shape = x_shape
|
||||
self.dim = x_shape[1]
|
||||
self.vec = vec
|
||||
|
||||
|
||||
def get_class_label(self, y):
|
||||
for idx, v in enumerate(y):
|
||||
if v == 1:
|
||||
return idx
|
||||
return -1
|
||||
|
||||
# This function assigns each data point to its own class
|
||||
# and returns the dictionary which contains the class name and parameters.
|
||||
def compute_cls(self, x):
|
||||
cls = {}
|
||||
# Appending each data point to its proper class
|
||||
for j in range(self.datanum):
|
||||
class_label = self.get_class_label(self.lbl[j])
|
||||
if class_label not in cls:
|
||||
cls[class_label] = []
|
||||
cls[class_label].append(x[j])
|
||||
return cls
|
||||
|
||||
# This function computes mean of each class. The mean is calculated through each dimension
|
||||
def compute_Mi(self, cls):
|
||||
M_i = np.zeros((self.classnum, self.dim))
|
||||
for i in cls:
|
||||
# Mean of each class
|
||||
# class_i = np.multiply(cls[i],vec)
|
||||
class_i = cls[i]
|
||||
M_i[i] = np.mean(class_i, axis=0)
|
||||
return M_i
|
||||
|
||||
# Adding data points as tuple to the dictionary so that we can access indices
|
||||
def compute_indices(self, x):
|
||||
data_idx = {}
|
||||
for j in range(self.datanum):
|
||||
class_label = self.get_class_label(self.lbl[j])
|
||||
if class_label not in data_idx:
|
||||
data_idx[class_label] = []
|
||||
t = (j, x[j])
|
||||
data_idx[class_label].append(t)
|
||||
return data_idx
|
||||
|
||||
# Adding indices to the list so we can access whole the indices
|
||||
def compute_listIndices(self, data_idx):
|
||||
lst_idx = []
|
||||
lst_idx_all = []
|
||||
for i in data_idx:
|
||||
if len(lst_idx) == 0:
|
||||
pass
|
||||
#Do nothing, because it is the first time list is created so is empty
|
||||
else:
|
||||
lst_idx = []
|
||||
# Here we put indices of each class in to the list called lst_idx_all
|
||||
for m in range(len(data_idx[i])):
|
||||
lst_idx.append(data_idx[i][m][0])
|
||||
lst_idx_all.append(lst_idx)
|
||||
return lst_idx_all
|
||||
|
||||
# This function calculates between classes variances
|
||||
def compute_Sb(self, cls, M_i, M_0):
|
||||
Sb = np.zeros((self.dim, self.dim))
|
||||
for i in cls:
|
||||
B = (M_i[i] - M_0).reshape(self.dim, 1)
|
||||
B_trans = B.transpose()
|
||||
Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
|
||||
return Sb
|
||||
|
||||
# This function calculates within classes variances
|
||||
def compute_Sw(self, cls, M_i):
|
||||
Sw = np.zeros((self.dim, self.dim))
|
||||
for i in cls:
|
||||
N_i = float(len(cls[i]))
|
||||
W_WT = np.zeros((self.dim, self.dim))
|
||||
for xk in cls[i]:
|
||||
W = (xk - M_i[i])
|
||||
W_WT += np.outer(W, W)
|
||||
Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
|
||||
return Sw
|
||||
|
||||
# Calculating beta and Bi for Sb
|
||||
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
|
||||
# import pdb
|
||||
# pdb.set_trace()
|
||||
B_i = np.zeros((self.classnum, self.dim))
|
||||
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
|
||||
for i in data_idx:
|
||||
# pdb.set_trace()
|
||||
# Calculating Bi
|
||||
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
|
||||
for k in range(self.datanum):
|
||||
for i in data_idx:
|
||||
N_i = float(len(data_idx[i]))
|
||||
if k in lst_idx_all[i]:
|
||||
beta = (float(1) / N_i) - (float(1) / self.datanum)
|
||||
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
|
||||
else:
|
||||
beta = -(float(1) / self.datanum)
|
||||
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
|
||||
Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
|
||||
return Sig_beta_B_i_all
|
||||
|
||||
|
||||
# Calculating W_j s separately so we can access all the W_j s anytime
|
||||
def compute_wj(self, data_idx, M_i):
|
||||
W_i = np.zeros((self.datanum, self.dim))
|
||||
for i in data_idx:
|
||||
N_i = float(len(data_idx[i]))
|
||||
for tpl in data_idx[i]:
|
||||
xj = tpl[1]
|
||||
j = tpl[0]
|
||||
W_i[j] = (xj - M_i[i])
|
||||
return W_i
|
||||
|
||||
# Calculating alpha and Wj for Sw
|
||||
def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
|
||||
Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
|
||||
for i in data_idx:
|
||||
N_i = float(len(data_idx[i]))
|
||||
for tpl in data_idx[i]:
|
||||
k = tpl[0]
|
||||
for j in lst_idx_all[i]:
|
||||
if k == j:
|
||||
alpha = 1 - (float(1) / N_i)
|
||||
Sig_alpha_W_i[k] += (alpha * W_i[j])
|
||||
else:
|
||||
alpha = 0 - (float(1) / N_i)
|
||||
Sig_alpha_W_i[k] += (alpha * W_i[j])
|
||||
Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
|
||||
return Sig_alpha_W_i
|
||||
|
||||
# This function calculates log of our prior
|
||||
def lnpdf(self, x):
|
||||
x = x.reshape(self.x_shape)
|
||||
xprim = x.dot(self.vec)
|
||||
x = xprim
|
||||
# print x
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
M_i = self.compute_Mi(cls)
|
||||
Sb = self.compute_Sb(cls, M_i, M_0)
|
||||
Sw = self.compute_Sw(cls, M_i)
|
||||
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
|
||||
|
|
@ -889,9 +1157,12 @@ class DGPLVM_T(Prior):
|
|||
# This function calculates derivative of the log of prior function
|
||||
def lnpdf_grad(self, x):
|
||||
x = x.reshape(self.x_shape)
|
||||
xprim = x.dot(self.vec)
|
||||
x = xprim
|
||||
# print x
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
M_i = self.compute_Mi(cls, self.vec)
|
||||
M_i = self.compute_Mi(cls)
|
||||
Sb = self.compute_Sb(cls, M_i, M_0)
|
||||
Sw = self.compute_Sw(cls, M_i)
|
||||
data_idx = self.compute_indices(x)
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from parameterized import Parameterized
|
||||
from param import Param
|
||||
from .parameterized import Parameterized
|
||||
from .param import Param
|
||||
|
||||
class Remapping(Parameterized):
|
||||
def mapping(self):
|
||||
|
|
@ -98,7 +98,7 @@ class Tie(Parameterized):
|
|||
if np.all(self.label_buf[idx]==0):
|
||||
# None of p has been tied before.
|
||||
tie_idx = self._expandTieParam(1)
|
||||
print tie_idx
|
||||
print(tie_idx)
|
||||
tie_id = self.label_buf.max()+1
|
||||
self.label_buf[tie_idx] = tie_id
|
||||
else:
|
||||
|
|
@ -185,18 +185,18 @@ class Tie(Parameterized):
|
|||
def _check_change(self):
|
||||
changed = False
|
||||
if self.tied_param is not None:
|
||||
for i in xrange(self.tied_param.size):
|
||||
for i in range(self.tied_param.size):
|
||||
b0 = self.label_buf==self.label_buf[self.buf_idx[i]]
|
||||
b = self._highest_parent_.param_array[b0]!=self.tied_param[i]
|
||||
if b.sum()==0:
|
||||
print 'XXX'
|
||||
print('XXX')
|
||||
continue
|
||||
elif b.sum()==1:
|
||||
print '!!!'
|
||||
print('!!!')
|
||||
val = self._highest_parent_.param_array[b0][b][0]
|
||||
self._highest_parent_.param_array[b0] = val
|
||||
else:
|
||||
print '@@@'
|
||||
print('@@@')
|
||||
self._highest_parent_.param_array[b0] = self.tied_param[i]
|
||||
changed = True
|
||||
return changed
|
||||
|
|
@ -212,11 +212,11 @@ class Tie(Parameterized):
|
|||
if self.tied_param is not None:
|
||||
self.tied_param.gradient = 0.
|
||||
[np.put(self.tied_param.gradient, i, self._highest_parent_.gradient[self.label_buf==self.label_buf[self.buf_idx[i]]].sum())
|
||||
for i in xrange(self.tied_param.size)]
|
||||
for i in range(self.tied_param.size)]
|
||||
|
||||
def propagate_val(self):
|
||||
if self.tied_param is not None:
|
||||
for i in xrange(self.tied_param.size):
|
||||
for i in range(self.tied_param.size):
|
||||
self._highest_parent_.param_array[self.label_buf==self.label_buf[self.buf_idx[i]]] = self.tied_param[i]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
|
||||
import numpy as np
|
||||
from domains import _POSITIVE,_NEGATIVE, _BOUNDED
|
||||
from .domains import _POSITIVE,_NEGATIVE, _BOUNDED
|
||||
import weakref
|
||||
|
||||
import sys
|
||||
|
|
@ -72,7 +72,7 @@ class Logexp(Transformation):
|
|||
return np.einsum('i,i->i', df, np.where(f>_lim_val, 1., 1. - np.exp(-f)))
|
||||
def initialize(self, f):
|
||||
if np.any(f < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
return np.abs(f)
|
||||
def __str__(self):
|
||||
return '+ve'
|
||||
|
|
@ -130,7 +130,7 @@ class NormalTheta(Transformation):
|
|||
|
||||
def initialize(self, f):
|
||||
if np.any(f[self.var_indices] < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
f[self.var_indices] = np.abs(f[self.var_indices])
|
||||
return f
|
||||
|
||||
|
|
@ -177,7 +177,7 @@ class NormalNaturalAntti(NormalTheta):
|
|||
|
||||
def initialize(self, f):
|
||||
if np.any(f[self.var_indices] < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
f[self.var_indices] = np.abs(f[self.var_indices])
|
||||
return f
|
||||
|
||||
|
|
@ -220,7 +220,7 @@ class NormalEta(Transformation):
|
|||
|
||||
def initialize(self, f):
|
||||
if np.any(f[self.var_indices] < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
f[self.var_indices] = np.abs(f[self.var_indices])
|
||||
return f
|
||||
|
||||
|
|
@ -360,7 +360,7 @@ class LogexpNeg(Transformation):
|
|||
return np.einsum('i,i->i', df, np.where(f>_lim_val, -1, -1 + np.exp(-f)))
|
||||
def initialize(self, f):
|
||||
if np.any(f < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
return np.abs(f)
|
||||
def __str__(self):
|
||||
return '+ve'
|
||||
|
|
@ -412,7 +412,7 @@ class LogexpClipped(Logexp):
|
|||
return np.einsum('i,i->i', df, gf) # np.where(f < self.lower, 0, gf)
|
||||
def initialize(self, f):
|
||||
if np.any(f < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
return np.abs(f)
|
||||
def __str__(self):
|
||||
return '+ve_c'
|
||||
|
|
@ -428,7 +428,7 @@ class Exponent(Transformation):
|
|||
return np.einsum('i,i->i', df, f)
|
||||
def initialize(self, f):
|
||||
if np.any(f < 0.):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
return np.abs(f)
|
||||
def __str__(self):
|
||||
return '+ve'
|
||||
|
|
@ -468,7 +468,11 @@ class Logistic(Transformation):
|
|||
for instance in cls._instances:
|
||||
if instance().lower == lower and instance().upper == upper:
|
||||
return instance()
|
||||
o = super(Transformation, cls).__new__(cls, lower, upper, *args, **kwargs)
|
||||
newfunc = super(Transformation, cls).__new__
|
||||
if newfunc is object.__new__:
|
||||
o = newfunc(cls)
|
||||
else:
|
||||
o = newfunc(cls, lower, upper, *args, **kwargs)
|
||||
cls._instances.append(weakref.ref(o))
|
||||
return cls._instances[-1]()
|
||||
def __init__(self, lower, upper):
|
||||
|
|
@ -486,7 +490,7 @@ class Logistic(Transformation):
|
|||
return np.einsum('i,i->i', df, (f - self.lower) * (self.upper - f) / self.difference)
|
||||
def initialize(self, f):
|
||||
if np.any(np.logical_or(f < self.lower, f > self.upper)):
|
||||
print "Warning: changing parameters to satisfy constraints"
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
#return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
|
||||
#FIXME: Max, zeros_like right?
|
||||
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(np.zeros_like(f)), f)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ Created on 11 Nov 2014
|
|||
|
||||
@author: maxz
|
||||
'''
|
||||
from observable import Observable
|
||||
from .observable import Observable
|
||||
|
||||
|
||||
class Updateable(Observable):
|
||||
|
|
@ -35,7 +35,7 @@ class Updateable(Observable):
|
|||
self.trigger_update()
|
||||
|
||||
def toggle_update(self):
|
||||
print "deprecated: toggle_update was renamed to update_toggle for easier access"
|
||||
print("deprecated: toggle_update was renamed to update_toggle for easier access")
|
||||
self.update_toggle()
|
||||
def update_toggle(self):
|
||||
self.update_model(not self.update_model())
|
||||
|
|
|
|||
|
|
@ -5,9 +5,9 @@ Created on 6 Nov 2013
|
|||
'''
|
||||
|
||||
import numpy as np
|
||||
from parameterized import Parameterized
|
||||
from param import Param
|
||||
from transformations import Logexp, Logistic,__fixed__
|
||||
from .parameterized import Parameterized
|
||||
from .param import Param
|
||||
from .transformations import Logexp, Logistic,__fixed__
|
||||
from GPy.util.misc import param_to_array
|
||||
from GPy.util.caching import Cache_this
|
||||
|
||||
|
|
@ -16,13 +16,13 @@ class VariationalPrior(Parameterized):
|
|||
super(VariationalPrior, self).__init__(name=name, **kw)
|
||||
|
||||
def KL_divergence(self, variational_posterior):
|
||||
raise NotImplementedError, "override this for variational inference of latent space"
|
||||
raise NotImplementedError("override this for variational inference of latent space")
|
||||
|
||||
def update_gradients_KL(self, variational_posterior):
|
||||
"""
|
||||
updates the gradients for mean and variance **in place**
|
||||
"""
|
||||
raise NotImplementedError, "override this for variational inference of latent space"
|
||||
raise NotImplementedError("override this for variational inference of latent space")
|
||||
|
||||
class NormalPrior(VariationalPrior):
|
||||
def KL_divergence(self, variational_posterior):
|
||||
|
|
@ -50,31 +50,29 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
def KL_divergence(self, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma,gamma1 = variational_posterior.gamma_probabilities()
|
||||
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
|
||||
gamma = variational_posterior.gamma.values
|
||||
if len(self.pi.shape)==2:
|
||||
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
|
||||
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
|
||||
pi = self.pi[idx]
|
||||
else:
|
||||
pi = self.pi
|
||||
|
||||
var_mean = np.square(mu)/self.variance
|
||||
var_S = (S/self.variance - np.log(S))
|
||||
var_gamma = (gamma*(log_gamma-np.log(pi))).sum()+(gamma1*(log_gamma1-np.log(1-pi))).sum()
|
||||
var_gamma = (gamma*np.log(gamma/pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-pi))).sum()
|
||||
return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
|
||||
|
||||
def update_gradients_KL(self, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma,gamma1 = variational_posterior.gamma_probabilities()
|
||||
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
|
||||
gamma = variational_posterior.gamma.values
|
||||
if len(self.pi.shape)==2:
|
||||
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
|
||||
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
|
||||
pi = self.pi[idx]
|
||||
else:
|
||||
pi = self.pi
|
||||
|
||||
variational_posterior.binary_prob.gradient -= (np.log((1-pi)/pi)+log_gamma-log_gamma1+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.)*gamma*gamma1
|
||||
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||
mu.gradient -= gamma*mu/self.variance
|
||||
S.gradient -= (1./self.variance - 1./S) * gamma /2.
|
||||
if self.learnPi:
|
||||
|
|
@ -141,7 +139,7 @@ class NormalPosterior(VariationalPosterior):
|
|||
holds the means and variances for a factorizing multivariate normal distribution
|
||||
'''
|
||||
|
||||
def plot(self, *args):
|
||||
def plot(self, *args, **kwargs):
|
||||
"""
|
||||
Plot latent space X in 1D:
|
||||
|
||||
|
|
@ -150,8 +148,7 @@ class NormalPosterior(VariationalPosterior):
|
|||
import sys
|
||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||
from ...plotting.matplot_dep import variational_plots
|
||||
import matplotlib
|
||||
return variational_plots.plot(self,*args)
|
||||
return variational_plots.plot(self, *args, **kwargs)
|
||||
|
||||
class SpikeAndSlabPosterior(VariationalPosterior):
|
||||
'''
|
||||
|
|
@ -162,25 +159,9 @@ class SpikeAndSlabPosterior(VariationalPosterior):
|
|||
binary_prob : the probability of the distribution on the slab part.
|
||||
"""
|
||||
super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
|
||||
self.gamma = Param("binary_prob",binary_prob)
|
||||
self.gamma = Param("binary_prob",binary_prob,Logistic(0.,1.))
|
||||
self.link_parameter(self.gamma)
|
||||
|
||||
@Cache_this(limit=5)
|
||||
def gamma_probabilities(self):
|
||||
prob = np.zeros_like(param_to_array(self.gamma))
|
||||
prob[self.gamma>-710] = 1./(1.+np.exp(-self.gamma[self.gamma>-710]))
|
||||
prob1 = -np.zeros_like(param_to_array(self.gamma))
|
||||
prob1[self.gamma<710] = 1./(1.+np.exp(self.gamma[self.gamma<710]))
|
||||
return prob, prob1
|
||||
|
||||
@Cache_this(limit=5)
|
||||
def gamma_log_prob(self):
|
||||
loggamma = param_to_array(self.gamma).copy()
|
||||
loggamma[loggamma>-40] = -np.log1p(np.exp(-loggamma[loggamma>-40]))
|
||||
loggamma1 = -param_to_array(self.gamma).copy()
|
||||
loggamma1[loggamma1>-40] = -np.log1p(np.exp(-loggamma1[loggamma1>-40]))
|
||||
return loggamma,loggamma1
|
||||
|
||||
def set_gradients(self, grad):
|
||||
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad
|
||||
|
||||
|
|
|
|||
|
|
@ -2,19 +2,15 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from gp import GP
|
||||
from parameterization.param import Param
|
||||
from .gp import GP
|
||||
from .parameterization.param import Param
|
||||
from ..inference.latent_function_inference import var_dtc
|
||||
from .. import likelihoods
|
||||
from parameterization.variational import VariationalPosterior, NormalPosterior
|
||||
from .parameterization.variational import VariationalPosterior, NormalPosterior
|
||||
from ..util.linalg import mdot
|
||||
|
||||
import logging
|
||||
from GPy.inference.latent_function_inference.posterior import Posterior
|
||||
from GPy.inference.optimization.stochastics import SparseGPStochastics,\
|
||||
SparseGPMissing
|
||||
#no stochastics.py file added! from GPy.inference.optimization.stochastics import SparseGPStochastics,\
|
||||
#SparseGPMissing
|
||||
import itertools
|
||||
logger = logging.getLogger("sparse gp")
|
||||
|
||||
class SparseGP(GP):
|
||||
|
|
@ -25,6 +21,10 @@ class SparseGP(GP):
|
|||
(Gaussian likelihoods) as well as non-conjugate sparse methods based on
|
||||
these.
|
||||
|
||||
This is not for missing data, as the implementation for missing data involves
|
||||
some inefficient optimization routine decisions.
|
||||
See missing data SparseGP implementation in py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
|
||||
|
||||
:param X: inputs
|
||||
:type X: np.ndarray (num_data x input_dim)
|
||||
:param likelihood: a likelihood instance, containing the observed data
|
||||
|
|
@ -40,7 +40,7 @@ class SparseGP(GP):
|
|||
|
||||
"""
|
||||
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, inference_method=None,
|
||||
name='sparse gp', Y_metadata=None, normalizer=False):
|
||||
#pick a sensible inference method
|
||||
if inference_method is None:
|
||||
|
|
@ -48,13 +48,13 @@ class SparseGP(GP):
|
|||
inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
|
||||
else:
|
||||
#inference_method = ??
|
||||
raise NotImplementedError, "what to do what to do?"
|
||||
print "defaulting to ", inference_method, "for latent function inference"
|
||||
raise NotImplementedError("what to do what to do?")
|
||||
print("defaulting to ", inference_method, "for latent function inference")
|
||||
|
||||
self.Z = Param('inducing inputs', Z)
|
||||
self.num_inducing = Z.shape[0]
|
||||
|
||||
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
|
||||
GP.__init__(self, X, Y, kernel, likelihood, mean_function, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
|
||||
|
||||
logger.info("Adding Z as parameter")
|
||||
self.link_parameter(self.Z, index=0)
|
||||
|
|
@ -63,6 +63,14 @@ class SparseGP(GP):
|
|||
def has_uncertain_inputs(self):
|
||||
return isinstance(self.X, VariationalPosterior)
|
||||
|
||||
def set_Z(self, Z, trigger_update=True):
|
||||
if trigger_update: self.update_model(False)
|
||||
self.unlink_parameter(self.Z)
|
||||
self.Z = Param('inducing inputs',Z)
|
||||
self.link_parameter(self.Z, index=0)
|
||||
if trigger_update: self.update_model(True)
|
||||
if trigger_update: self._trigger_params_changed()
|
||||
|
||||
def parameters_changed(self):
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)
|
||||
|
||||
|
|
@ -111,7 +119,7 @@ class SparseGP(GP):
|
|||
|
||||
For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we
|
||||
return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD).
|
||||
This is for both with and without missing data.
|
||||
This is for both with and without missing data. See for missing data SparseGP implementation py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
|
||||
"""
|
||||
|
||||
if kern is None: kern = self.kern
|
||||
|
|
@ -124,15 +132,26 @@ class SparseGP(GP):
|
|||
if self.posterior.woodbury_inv.ndim == 2:
|
||||
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
|
||||
elif self.posterior.woodbury_inv.ndim == 3:
|
||||
var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
|
||||
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
|
||||
for i in range(var.shape[1]):
|
||||
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
|
||||
var = var
|
||||
else:
|
||||
Kxx = kern.Kdiag(Xnew)
|
||||
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
|
||||
if self.posterior.woodbury_inv.ndim == 2:
|
||||
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
|
||||
elif self.posterior.woodbury_inv.ndim == 3:
|
||||
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
|
||||
for i in range(var.shape[1]):
|
||||
var[:, i] = (Kxx - (np.sum(np.dot(self.posterior.woodbury_inv[:, :, i].T, Kx) * Kx, 0)))
|
||||
var = var
|
||||
#add in the mean function
|
||||
if self.mean_function is not None:
|
||||
mu += self.mean_function.f(Xnew)
|
||||
else:
|
||||
psi0_star = self.kern.psi0(self.Z, Xnew)
|
||||
psi1_star = self.kern.psi1(self.Z, Xnew)
|
||||
#psi2_star = self.kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
|
||||
psi0_star = kern.psi0(self.Z, Xnew)
|
||||
psi1_star = kern.psi1(self.Z, Xnew)
|
||||
#psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
|
||||
la = self.posterior.woodbury_vector
|
||||
mu = np.dot(psi1_star, la) # TODO: dimensions?
|
||||
|
||||
|
|
@ -144,7 +163,7 @@ class SparseGP(GP):
|
|||
|
||||
for i in range(Xnew.shape[0]):
|
||||
_mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
|
||||
psi2_star = self.kern.psi2(self.Z, NormalPosterior(_mu, _var))
|
||||
psi2_star = kern.psi2(self.Z, NormalPosterior(_mu, _var))
|
||||
tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
|
||||
|
||||
var_ = mdot(la.T, tmp, la)
|
||||
|
|
@ -158,4 +177,5 @@ class SparseGP(GP):
|
|||
var[i] = var_
|
||||
else:
|
||||
var[i] = np.diag(var_)+p0-t2
|
||||
|
||||
return mu, var
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from sparse_gp import SparseGP
|
||||
from .sparse_gp import SparseGP
|
||||
from numpy.linalg.linalg import LinAlgError
|
||||
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ class SparseGP_MPI(SparseGP):
|
|||
self.N_range = (N_start, N_end)
|
||||
self.N_list = np.array(N_list)
|
||||
self.Y_local = self.Y[N_start:N_end]
|
||||
print 'MPI RANK '+str(self.mpi_comm.rank)+' with the data range '+str(self.N_range)
|
||||
print('MPI RANK '+str(self.mpi_comm.rank)+' with the data range '+str(self.N_range))
|
||||
mpi_comm.Bcast(self.param_array, root=0)
|
||||
self.update_model(True)
|
||||
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
|
||||
import numpy as np
|
||||
from ..util import choleskies
|
||||
from sparse_gp import SparseGP
|
||||
from parameterization.param import Param
|
||||
from .sparse_gp import SparseGP
|
||||
from .parameterization.param import Param
|
||||
from ..inference.latent_function_inference import SVGP as svgp_inf
|
||||
|
||||
|
||||
class SVGP(SparseGP):
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, name='SVGP', Y_metadata=None, batchsize=None):
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, name='SVGP', Y_metadata=None, batchsize=None, num_latent_functions=None):
|
||||
"""
|
||||
Stochastic Variational GP.
|
||||
|
||||
|
|
@ -38,33 +38,45 @@ class SVGP(SparseGP):
|
|||
#create the SVI inference method
|
||||
inf_method = svgp_inf()
|
||||
|
||||
SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, inference_method=inf_method,
|
||||
SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, mean_function=mean_function, inference_method=inf_method,
|
||||
name=name, Y_metadata=Y_metadata, normalizer=False)
|
||||
|
||||
self.m = Param('q_u_mean', np.zeros((self.num_inducing, Y.shape[1])))
|
||||
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,Y.shape[1])))
|
||||
#assume the number of latent functions is one per col of Y unless specified
|
||||
if num_latent_functions is None:
|
||||
num_latent_functions = Y.shape[1]
|
||||
|
||||
self.m = Param('q_u_mean', np.zeros((self.num_inducing, num_latent_functions)))
|
||||
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,num_latent_functions)))
|
||||
self.chol = Param('q_u_chol', chol)
|
||||
self.link_parameter(self.chol)
|
||||
self.link_parameter(self.m)
|
||||
|
||||
def parameters_changed(self):
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0]))
|
||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.q_u_mean, self.q_u_chol, self.kern, self.X, self.Z, self.likelihood, self.Y, self.mean_function, self.Y_metadata, KL_scale=1.0, batch_scale=float(self.X_all.shape[0])/float(self.X.shape[0]))
|
||||
|
||||
#update the kernel gradients
|
||||
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z)
|
||||
grad = self.kern.gradient.copy()
|
||||
self.kern.update_gradients_full(self.grad_dict['dL_dKmn'], self.Z, self.X)
|
||||
grad += self.kern.gradient
|
||||
grad += self.kern.gradient.copy()
|
||||
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
|
||||
self.kern.gradient += grad
|
||||
if not self.Z.is_fixed:# only compute these expensive gradients if we need them
|
||||
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z) + self.kern.gradients_X(self.grad_dict['dL_dKmn'], self.Z, self.X)
|
||||
|
||||
|
||||
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
|
||||
#update the variational parameter gradients:
|
||||
self.m.gradient = self.grad_dict['dL_dm']
|
||||
self.chol.gradient = self.grad_dict['dL_dchol']
|
||||
|
||||
if self.mean_function is not None:
|
||||
self.mean_function.update_gradients(self.grad_dict['dL_dmfX'], self.X)
|
||||
g = self.mean_function.gradient[:].copy()
|
||||
self.mean_function.update_gradients(self.grad_dict['dL_dmfZ'], self.Z)
|
||||
self.mean_function.gradient[:] += g
|
||||
self.Z.gradient[:] += self.mean_function.gradients_X(self.grad_dict['dL_dmfZ'], self.Z)
|
||||
|
||||
def set_data(self, X, Y):
|
||||
"""
|
||||
Set the data without calling parameters_changed to avoid wasted computation
|
||||
|
|
|
|||
|
|
@ -223,7 +223,7 @@ class Symbolic_core():
|
|||
|
||||
def code_gradients_cacheable(self, function, variable):
|
||||
if variable not in self.cacheable:
|
||||
raise RuntimeError, variable + ' must be a cacheable.'
|
||||
raise RuntimeError(variable + ' must be a cacheable.')
|
||||
lcode = 'gradients_' + variable + ' = np.zeros_like(' + variable + ')\n'
|
||||
lcode += 'self.update_cache(' + ', '.join(self.cacheable) + ')\n'
|
||||
for i, theta in enumerate(self.variables[variable]):
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Copyright (c) 2012-2014, Max Zwiessele.
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
import sys
|
||||
import time
|
||||
|
|
@ -11,7 +11,7 @@ def exponents(fnow, current_grad):
|
|||
return np.sign(exps) * np.log10(exps).astype(int)
|
||||
|
||||
class VerboseOptimization(object):
|
||||
def __init__(self, model, opt, maxiters, verbose=False, current_iteration=0, ipython_notebook=True):
|
||||
def __init__(self, model, opt, maxiters, verbose=False, current_iteration=0, ipython_notebook=True, clear_after_finish=False):
|
||||
self.verbose = verbose
|
||||
if self.verbose:
|
||||
self.model = model
|
||||
|
|
@ -22,30 +22,31 @@ class VerboseOptimization(object):
|
|||
self.opt_name = opt.opt_name
|
||||
self.model.add_observer(self, self.print_status)
|
||||
self.status = 'running'
|
||||
self.clear = clear_after_finish
|
||||
|
||||
self.update()
|
||||
|
||||
try:
|
||||
from IPython.display import display
|
||||
from IPython.html.widgets import FloatProgressWidget, HTMLWidget, ContainerWidget
|
||||
self.text = HTMLWidget()
|
||||
self.progress = FloatProgressWidget()
|
||||
self.model_show = HTMLWidget()
|
||||
from IPython.html.widgets import IntProgress, HTML, Box, VBox, HBox, FlexBox
|
||||
self.text = HTML(width='100%')
|
||||
self.progress = IntProgress(min=0, max=maxiters)
|
||||
#self.progresstext = Text(width='100%', disabled=True, value='0/{}'.format(maxiters))
|
||||
self.model_show = HTML()
|
||||
self.ipython_notebook = ipython_notebook
|
||||
except:
|
||||
# Not in Ipython notebook
|
||||
self.ipython_notebook = False
|
||||
|
||||
if self.ipython_notebook:
|
||||
left_col = VBox(children=[self.progress, self.text], padding=2, width='40%')
|
||||
right_col = Box(children=[self.model_show], padding=2, width='60%')
|
||||
self.hor_align = FlexBox(children = [left_col, right_col], width='100%', orientation='horizontal')
|
||||
|
||||
display(self.hor_align)
|
||||
|
||||
try:
|
||||
self.text.set_css('width', '100%')
|
||||
#self.progress.set_css('width', '100%')
|
||||
|
||||
left_col = ContainerWidget(children = [self.progress, self.text])
|
||||
right_col = ContainerWidget(children = [self.model_show])
|
||||
hor_align = ContainerWidget(children = [left_col, right_col])
|
||||
|
||||
display(hor_align)
|
||||
|
||||
left_col.set_css({
|
||||
'padding': '2px',
|
||||
'width': "100%",
|
||||
|
|
@ -55,22 +56,25 @@ class VerboseOptimization(object):
|
|||
'padding': '2px',
|
||||
})
|
||||
|
||||
hor_align.set_css({
|
||||
self.hor_align.set_css({
|
||||
'width': "100%",
|
||||
})
|
||||
|
||||
hor_align.remove_class('vbox')
|
||||
hor_align.add_class('hbox')
|
||||
self.hor_align.remove_class('vbox')
|
||||
self.hor_align.add_class('hbox')
|
||||
|
||||
left_col.add_class("box-flex1")
|
||||
right_col.add_class('box-flex0')
|
||||
|
||||
except:
|
||||
pass
|
||||
|
||||
#self.text.add_class('box-flex2')
|
||||
#self.progress.add_class('box-flex1')
|
||||
else:
|
||||
self.exps = exponents(self.fnow, self.current_gradient)
|
||||
print 'Running {} Code:'.format(self.opt_name)
|
||||
print ' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "secs", mi=self.len_maxiters)
|
||||
print('Running {} Code:'.format(self.opt_name))
|
||||
print(' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "secs", mi=self.len_maxiters))
|
||||
|
||||
def __enter__(self):
|
||||
self.start = time.time()
|
||||
|
|
@ -102,7 +106,8 @@ class VerboseOptimization(object):
|
|||
html_body += "<td class='tg-right'>{}</td>".format(val)
|
||||
html_body += "</tr>"
|
||||
self.text.value = html_begin + html_body + html_end
|
||||
self.progress.value = 100*(self.iteration+1)/self.maxiters
|
||||
self.progress.value = (self.iteration+1)
|
||||
#self.progresstext.value = '0/{}'.format((self.iteration+1))
|
||||
self.model_show.value = self.model._repr_html_()
|
||||
else:
|
||||
n_exps = exponents(self.fnow, self.current_gradient)
|
||||
|
|
@ -111,11 +116,11 @@ class VerboseOptimization(object):
|
|||
b = np.any(n_exps < self.exps)
|
||||
if a or b:
|
||||
self.p_iter = self.iteration
|
||||
print ''
|
||||
print('')
|
||||
if b:
|
||||
self.exps = n_exps
|
||||
print '\r',
|
||||
print '{3:> 7.2g} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), time.time()-self.start, mi=self.len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
||||
print('\r', end=' ')
|
||||
print('{3:> 7.2g} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), time.time()-self.start, mi=self.len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
||||
sys.stdout.flush()
|
||||
|
||||
def print_status(self, me, which=None):
|
||||
|
|
@ -136,6 +141,13 @@ class VerboseOptimization(object):
|
|||
|
||||
def finish(self, opt):
|
||||
self.status = opt.status
|
||||
if self.verbose and self.ipython_notebook:
|
||||
if 'conv' in self.status.lower():
|
||||
self.progress.bar_style = 'success'
|
||||
elif self.iteration >= self.maxiters:
|
||||
self.progress.bar_style = 'warning'
|
||||
else:
|
||||
self.progress.bar_style = 'danger'
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
if self.verbose:
|
||||
|
|
@ -144,7 +156,9 @@ class VerboseOptimization(object):
|
|||
self.print_out()
|
||||
|
||||
if not self.ipython_notebook:
|
||||
print ''
|
||||
print 'Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start)
|
||||
print 'Optimization status: {0:.5g}'.format(self.status)
|
||||
print
|
||||
print()
|
||||
print('Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start))
|
||||
print('Optimization status: {0}'.format(self.status))
|
||||
print()
|
||||
elif self.clear:
|
||||
self.hor_align.close()
|
||||
|
|
|
|||
|
|
@ -25,3 +25,6 @@ MKL = False
|
|||
[weave]
|
||||
#if true, try to use weave, and fall back to numpy. if false, just use numpy.
|
||||
working = True
|
||||
|
||||
[cython]
|
||||
working = True
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import classification
|
||||
import regression
|
||||
import dimensionality_reduction
|
||||
import non_gaussian
|
||||
from . import classification
|
||||
from . import regression
|
||||
from . import dimensionality_reduction
|
||||
from . import non_gaussian
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
|
|||
|
||||
"""
|
||||
try:import pods
|
||||
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
data = pods.datasets.oil()
|
||||
X = data['X']
|
||||
Xtest = data['Xtest']
|
||||
|
|
@ -52,7 +52,7 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
|
|||
"""
|
||||
|
||||
try:import pods
|
||||
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
data = pods.datasets.toy_linear_1d_classification(seed=seed)
|
||||
Y = data['Y'][:, 0:1]
|
||||
Y[Y.flatten() == -1] = 0
|
||||
|
|
@ -75,7 +75,7 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
|
|||
m.plot_f(ax=axes[0])
|
||||
m.plot(ax=axes[1])
|
||||
|
||||
print m
|
||||
print(m)
|
||||
return m
|
||||
|
||||
def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=True):
|
||||
|
|
@ -88,7 +88,7 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
|
|||
"""
|
||||
|
||||
try:import pods
|
||||
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
data = pods.datasets.toy_linear_1d_classification(seed=seed)
|
||||
Y = data['Y'][:, 0:1]
|
||||
Y[Y.flatten() == -1] = 0
|
||||
|
|
@ -114,7 +114,7 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
|
|||
m.plot_f(ax=axes[0])
|
||||
m.plot(ax=axes[1])
|
||||
|
||||
print m
|
||||
print(m)
|
||||
return m
|
||||
|
||||
def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, optimize=True, plot=True):
|
||||
|
|
@ -127,7 +127,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
|
|||
"""
|
||||
|
||||
try:import pods
|
||||
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
data = pods.datasets.toy_linear_1d_classification(seed=seed)
|
||||
Y = data['Y'][:, 0:1]
|
||||
Y[Y.flatten() == -1] = 0
|
||||
|
|
@ -147,7 +147,7 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
|
|||
m.plot_f(ax=axes[0])
|
||||
m.plot(ax=axes[1])
|
||||
|
||||
print m
|
||||
print(m)
|
||||
return m
|
||||
|
||||
def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
|
||||
|
|
@ -160,7 +160,7 @@ def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
|
|||
"""
|
||||
|
||||
try:import pods
|
||||
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
data = pods.datasets.toy_linear_1d_classification(seed=seed)
|
||||
Y = data['Y'][:, 0:1]
|
||||
Y[Y.flatten() == -1] = 0
|
||||
|
|
@ -177,7 +177,7 @@ def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
|
|||
# Parameters optimization:
|
||||
for _ in range(5):
|
||||
m.optimize(max_iters=int(max_iters/5))
|
||||
print m
|
||||
print(m)
|
||||
|
||||
# Plot
|
||||
if plot:
|
||||
|
|
@ -186,7 +186,7 @@ def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
|
|||
m.plot_f(ax=axes[0])
|
||||
m.plot(ax=axes[1])
|
||||
|
||||
print m
|
||||
print(m)
|
||||
return m
|
||||
|
||||
def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=None, optimize=True, plot=True):
|
||||
|
|
@ -202,7 +202,7 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
|
|||
:type kernel: a GPy kernel
|
||||
"""
|
||||
try:import pods
|
||||
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
data = pods.datasets.crescent_data(seed=seed)
|
||||
Y = data['Y']
|
||||
Y[Y.flatten()==-1] = 0
|
||||
|
|
@ -224,5 +224,5 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
|
|||
if plot:
|
||||
m.plot()
|
||||
|
||||
print m
|
||||
print(m)
|
||||
return m
|
||||
|
|
|
|||
|
|
@ -335,7 +335,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
|
|||
m.likelihood.variance = .1
|
||||
|
||||
if optimize:
|
||||
print "Optimizing model:"
|
||||
print("Optimizing model:")
|
||||
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
|
||||
gtol=.05)
|
||||
if plot:
|
||||
|
|
@ -360,7 +360,7 @@ def ssgplvm_simulation(optimize=True, verbose=1,
|
|||
m.likelihood.variance = .1
|
||||
|
||||
if optimize:
|
||||
print "Optimizing model:"
|
||||
print("Optimizing model:")
|
||||
m.optimize('scg', messages=verbose, max_iters=max_iters,
|
||||
gtol=.05)
|
||||
if plot:
|
||||
|
|
@ -390,7 +390,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
|||
m.Yreal = Y
|
||||
|
||||
if optimize:
|
||||
print "Optimizing model:"
|
||||
print("Optimizing model:")
|
||||
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
|
||||
gtol=.05)
|
||||
if plot:
|
||||
|
|
@ -414,7 +414,7 @@ def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
|
|||
m['.*noise'] = [Y.var() / 40. for Y in Ylist]
|
||||
|
||||
if optimize:
|
||||
print "Optimizing Model:"
|
||||
print("Optimizing Model:")
|
||||
m.optimize(messages=verbose, max_iters=8e3)
|
||||
if plot:
|
||||
m.X.plot("MRD Latent Space 1D")
|
||||
|
|
@ -442,7 +442,7 @@ def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim
|
|||
initx="random", initz='permute', **kw)
|
||||
|
||||
if optimize:
|
||||
print "Optimizing Model:"
|
||||
print("Optimizing Model:")
|
||||
m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
|
||||
if plot:
|
||||
m.X.plot("MRD Latent Space 1D")
|
||||
|
|
@ -607,7 +607,7 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
|
|||
try:
|
||||
if optimize: m.optimize('bfgs', messages=verbose, max_iters=5e3, bfgs_factor=10)
|
||||
except KeyboardInterrupt:
|
||||
print "Keyboard interrupt, continuing to plot and return"
|
||||
print("Keyboard interrupt, continuing to plot and return")
|
||||
|
||||
if plot:
|
||||
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
|
||||
|
|
@ -658,7 +658,7 @@ def ssgplvm_simulation_linear():
|
|||
def sample_X(Q, pi):
|
||||
x = np.empty(Q)
|
||||
dies = np.random.rand(Q)
|
||||
for q in xrange(Q):
|
||||
for q in range(Q):
|
||||
if dies[q] < pi:
|
||||
x[q] = np.random.randn()
|
||||
else:
|
||||
|
|
@ -668,7 +668,7 @@ def ssgplvm_simulation_linear():
|
|||
Y = np.empty((N, D))
|
||||
X = np.empty((N, Q))
|
||||
# Generate data from random sampled weight matrices
|
||||
for n in xrange(N):
|
||||
for n in range(N):
|
||||
X[n] = sample_X(Q, pi)
|
||||
w = np.random.randn(D, Q)
|
||||
Y[n] = np.dot(w, X[n])
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ def student_t_approx(optimize=True, plot=True):
|
|||
|
||||
#Add student t random noise to datapoints
|
||||
deg_free = 1
|
||||
print "Real noise: ", real_std
|
||||
print("Real noise: ", real_std)
|
||||
initial_var_guess = 0.5
|
||||
edited_real_sd = initial_var_guess
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ def student_t_approx(optimize=True, plot=True):
|
|||
m4['.*t_scale2'].constrain_bounded(1e-6, 10.)
|
||||
m4['.*white'].constrain_fixed(1e-5)
|
||||
m4.randomize()
|
||||
print m4
|
||||
print(m4)
|
||||
debug=True
|
||||
if debug:
|
||||
m4.optimize(messages=1)
|
||||
|
|
@ -81,18 +81,18 @@ def student_t_approx(optimize=True, plot=True):
|
|||
pb.plot(m4.X, m4.inference_method.f_hat)
|
||||
pb.plot(m4.X, m4.Y, 'rx')
|
||||
m4.plot()
|
||||
print m4
|
||||
print(m4)
|
||||
return m4
|
||||
|
||||
if optimize:
|
||||
optimizer='scg'
|
||||
print "Clean Gaussian"
|
||||
print("Clean Gaussian")
|
||||
m1.optimize(optimizer, messages=1)
|
||||
print "Corrupt Gaussian"
|
||||
print("Corrupt Gaussian")
|
||||
m2.optimize(optimizer, messages=1)
|
||||
print "Clean student t"
|
||||
print("Clean student t")
|
||||
m3.optimize(optimizer, messages=1)
|
||||
print "Corrupt student t"
|
||||
print("Corrupt student t")
|
||||
m4.optimize(optimizer, messages=1)
|
||||
|
||||
if plot:
|
||||
|
|
@ -151,7 +151,7 @@ def boston_example(optimize=True, plot=True):
|
|||
|
||||
for n, (train, test) in enumerate(kf):
|
||||
X_train, X_test, Y_train, Y_test = X[train], X[test], Y[train], Y[test]
|
||||
print "Fold {}".format(n)
|
||||
print("Fold {}".format(n))
|
||||
|
||||
noise = 1e-1 #np.exp(-2)
|
||||
rbf_len = 0.5
|
||||
|
|
@ -163,21 +163,21 @@ def boston_example(optimize=True, plot=True):
|
|||
score_folds[0, n] = rmse(Y_test, np.mean(Y_train))
|
||||
|
||||
#Gaussian GP
|
||||
print "Gauss GP"
|
||||
print("Gauss GP")
|
||||
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
|
||||
mgp.constrain_fixed('.*white', 1e-5)
|
||||
mgp['.*len'] = rbf_len
|
||||
mgp['.*noise'] = noise
|
||||
print mgp
|
||||
print(mgp)
|
||||
if optimize:
|
||||
mgp.optimize(optimizer=optimizer, messages=messages)
|
||||
Y_test_pred = mgp.predict(X_test)
|
||||
score_folds[1, n] = rmse(Y_test, Y_test_pred[0])
|
||||
pred_density[1, n] = np.mean(mgp.log_predictive_density(X_test, Y_test))
|
||||
print mgp
|
||||
print pred_density
|
||||
print(mgp)
|
||||
print(pred_density)
|
||||
|
||||
print "Gaussian Laplace GP"
|
||||
print("Gaussian Laplace GP")
|
||||
N, D = Y_train.shape
|
||||
g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
|
||||
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
|
||||
|
|
@ -186,18 +186,18 @@ def boston_example(optimize=True, plot=True):
|
|||
mg.constrain_fixed('.*white', 1e-5)
|
||||
mg['rbf_len'] = rbf_len
|
||||
mg['noise'] = noise
|
||||
print mg
|
||||
print(mg)
|
||||
if optimize:
|
||||
mg.optimize(optimizer=optimizer, messages=messages)
|
||||
Y_test_pred = mg.predict(X_test)
|
||||
score_folds[2, n] = rmse(Y_test, Y_test_pred[0])
|
||||
pred_density[2, n] = np.mean(mg.log_predictive_density(X_test, Y_test))
|
||||
print pred_density
|
||||
print mg
|
||||
print(pred_density)
|
||||
print(mg)
|
||||
|
||||
for stu_num, df in enumerate(degrees_freedoms):
|
||||
#Student T
|
||||
print "Student-T GP {}df".format(df)
|
||||
print("Student-T GP {}df".format(df))
|
||||
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
|
||||
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
|
||||
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
|
||||
|
|
@ -205,14 +205,14 @@ def boston_example(optimize=True, plot=True):
|
|||
mstu_t.constrain_bounded('.*t_scale2', 0.0001, 1000)
|
||||
mstu_t['rbf_len'] = rbf_len
|
||||
mstu_t['.*t_scale2'] = noise
|
||||
print mstu_t
|
||||
print(mstu_t)
|
||||
if optimize:
|
||||
mstu_t.optimize(optimizer=optimizer, messages=messages)
|
||||
Y_test_pred = mstu_t.predict(X_test)
|
||||
score_folds[3+stu_num, n] = rmse(Y_test, Y_test_pred[0])
|
||||
pred_density[3+stu_num, n] = np.mean(mstu_t.log_predictive_density(X_test, Y_test))
|
||||
print pred_density
|
||||
print mstu_t
|
||||
print(pred_density)
|
||||
print(mstu_t)
|
||||
|
||||
if plot:
|
||||
plt.figure()
|
||||
|
|
@ -230,8 +230,8 @@ def boston_example(optimize=True, plot=True):
|
|||
plt.scatter(X_test[:, data_axis_plot], Y_test, c='r', marker='x')
|
||||
plt.title('Stu t {}df'.format(df))
|
||||
|
||||
print "Average scores: {}".format(np.mean(score_folds, 1))
|
||||
print "Average pred density: {}".format(np.mean(pred_density, 1))
|
||||
print("Average scores: {}".format(np.mean(score_folds, 1)))
|
||||
print("Average pred density: {}".format(np.mean(pred_density, 1)))
|
||||
|
||||
if plot:
|
||||
#Plotting
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ def olympic_marathon_men(optimize=True, plot=True):
|
|||
"""Run a standard Gaussian process regression on the Olympic marathon data."""
|
||||
try:import pods
|
||||
except ImportError:
|
||||
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
return
|
||||
data = pods.datasets.olympic_marathon_men()
|
||||
|
||||
|
|
@ -88,7 +88,7 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
|
|||
"""
|
||||
try:import pods
|
||||
except ImportError:
|
||||
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
return
|
||||
data = pods.datasets.epomeo_gpx()
|
||||
num_data_list = []
|
||||
|
|
@ -135,7 +135,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
|
|||
|
||||
try:import pods
|
||||
except ImportError:
|
||||
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
return
|
||||
data = pods.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number)
|
||||
# data['Y'] = data['Y'][0::2, :]
|
||||
|
|
@ -219,7 +219,7 @@ def olympic_100m_men(optimize=True, plot=True):
|
|||
"""Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
|
||||
try:import pods
|
||||
except ImportError:
|
||||
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
return
|
||||
data = pods.datasets.olympic_100m_men()
|
||||
|
||||
|
|
@ -240,7 +240,7 @@ def toy_rbf_1d(optimize=True, plot=True):
|
|||
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
|
||||
try:import pods
|
||||
except ImportError:
|
||||
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
return
|
||||
data = pods.datasets.toy_rbf_1d()
|
||||
|
||||
|
|
@ -258,7 +258,7 @@ def toy_rbf_1d_50(optimize=True, plot=True):
|
|||
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
|
||||
try:import pods
|
||||
except ImportError:
|
||||
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
return
|
||||
data = pods.datasets.toy_rbf_1d_50()
|
||||
|
||||
|
|
@ -377,7 +377,7 @@ def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
|
|||
"""Predict the location of a robot given wirelss signal strength readings."""
|
||||
try:import pods
|
||||
except ImportError:
|
||||
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
return
|
||||
data = pods.datasets.robot_wireless()
|
||||
|
||||
|
|
@ -398,14 +398,14 @@ def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
|
|||
|
||||
sse = ((data['Xtest'] - Xpredict)**2).sum()
|
||||
|
||||
print('Sum of squares error on test data: ' + str(sse))
|
||||
print(('Sum of squares error on test data: ' + str(sse)))
|
||||
return m
|
||||
|
||||
def silhouette(max_iters=100, optimize=True, plot=True):
|
||||
"""Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
|
||||
try:import pods
|
||||
except ImportError:
|
||||
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
|
||||
print('pods unavailable, see https://github.com/sods/ods for example datasets')
|
||||
return
|
||||
data = pods.datasets.silhouette()
|
||||
|
||||
|
|
@ -416,7 +416,7 @@ def silhouette(max_iters=100, optimize=True, plot=True):
|
|||
if optimize:
|
||||
m.optimize(messages=True, max_iters=max_iters)
|
||||
|
||||
print m
|
||||
print(m)
|
||||
return m
|
||||
|
||||
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=False):
|
||||
|
|
@ -468,7 +468,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt
|
|||
if plot:
|
||||
m.plot()
|
||||
|
||||
print m
|
||||
print(m)
|
||||
return m
|
||||
|
||||
def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
|
||||
|
|
@ -492,7 +492,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
|
|||
if plot:
|
||||
m.plot(ax=axes[0])
|
||||
axes[0].set_title('no input uncertainty')
|
||||
print m
|
||||
print(m)
|
||||
|
||||
# the same Model with uncertainty
|
||||
m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S)
|
||||
|
|
@ -503,5 +503,50 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
|
|||
axes[1].set_title('with input uncertainty')
|
||||
fig.canvas.draw()
|
||||
|
||||
print m
|
||||
print(m)
|
||||
return m
|
||||
|
||||
def simple_mean_function(max_iters=100, optimize=True, plot=True):
|
||||
"""
|
||||
The simplest possible mean function. No parameters, just a simple Sinusoid.
|
||||
"""
|
||||
#create simple mean function
|
||||
mf = GPy.core.Mapping(1,1)
|
||||
mf.f = np.sin
|
||||
mf.update_gradients = lambda a,b: None
|
||||
|
||||
X = np.linspace(0,10,50).reshape(-1,1)
|
||||
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape)
|
||||
|
||||
k =GPy.kern.RBF(1)
|
||||
lik = GPy.likelihoods.Gaussian()
|
||||
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
|
||||
if optimize:
|
||||
m.optimize(max_iters=max_iters)
|
||||
if plot:
|
||||
m.plot(plot_limits=(-10,15))
|
||||
return m
|
||||
|
||||
def parametric_mean_function(max_iters=100, optimize=True, plot=True):
|
||||
"""
|
||||
A linear mean function with parameters that we'll learn alongside the kernel
|
||||
"""
|
||||
#create simple mean function
|
||||
mf = GPy.core.Mapping(1,1)
|
||||
mf.f = np.sin
|
||||
|
||||
X = np.linspace(0,10,50).reshape(-1,1)
|
||||
Y = np.sin(X) + 0.5*np.cos(3*X) + 0.1*np.random.randn(*X.shape) + 3*X
|
||||
|
||||
mf = GPy.mappings.Linear(1,1)
|
||||
|
||||
k =GPy.kern.RBF(1)
|
||||
lik = GPy.likelihoods.Gaussian()
|
||||
m = GPy.core.GP(X, Y, kernel=k, likelihood=lik, mean_function=mf)
|
||||
if optimize:
|
||||
m.optimize(max_iters=max_iters)
|
||||
if plot:
|
||||
m.plot()
|
||||
return m
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
import latent_function_inference
|
||||
import optimization
|
||||
import mcmc
|
||||
from . import latent_function_inference
|
||||
from . import optimization
|
||||
from . import mcmc
|
||||
|
|
|
|||
|
|
@ -61,15 +61,15 @@ class InferenceMethodList(LatentFunctionInference, list):
|
|||
for inf in state:
|
||||
self.append(inf)
|
||||
|
||||
from exact_gaussian_inference import ExactGaussianInference
|
||||
from laplace import Laplace
|
||||
from .exact_gaussian_inference import ExactGaussianInference
|
||||
from .laplace import Laplace,LaplaceBlock
|
||||
from GPy.inference.latent_function_inference.var_dtc import VarDTC
|
||||
from expectation_propagation import EP
|
||||
from expectation_propagation_dtc import EPDTC
|
||||
from dtc import DTC
|
||||
from fitc import FITC
|
||||
from var_dtc_parallel import VarDTC_minibatch
|
||||
from svgp import SVGP
|
||||
from .expectation_propagation import EP
|
||||
from .expectation_propagation_dtc import EPDTC
|
||||
from .dtc import DTC
|
||||
from .fitc import FITC
|
||||
from .var_dtc_parallel import VarDTC_minibatch
|
||||
from .svgp import SVGP
|
||||
|
||||
# class FullLatentFunctionData(object):
|
||||
#
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Copyright (c) 2012-2014, James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from posterior import Posterior
|
||||
from .posterior import Posterior
|
||||
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
|
||||
import numpy as np
|
||||
from . import LatentFunctionInference
|
||||
|
|
@ -20,7 +20,8 @@ class DTC(LatentFunctionInference):
|
|||
def __init__(self):
|
||||
self.const_jitter = 1e-6
|
||||
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
|
||||
assert mean_function is None, "inference with a mean function not implemented"
|
||||
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
|
||||
|
||||
num_inducing, _ = Z.shape
|
||||
|
|
@ -29,7 +30,7 @@ class DTC(LatentFunctionInference):
|
|||
#make sure the noise is not hetero
|
||||
beta = 1./likelihood.gaussian_variance(Y_metadata)
|
||||
if beta.size > 1:
|
||||
raise NotImplementedError, "no hetero noise with this implementation of DTC"
|
||||
raise NotImplementedError("no hetero noise with this implementation of DTC")
|
||||
|
||||
Kmm = kern.K(Z)
|
||||
Knn = kern.Kdiag(X)
|
||||
|
|
@ -88,7 +89,8 @@ class vDTC(object):
|
|||
def __init__(self):
|
||||
self.const_jitter = 1e-6
|
||||
|
||||
def inference(self, kern, X, X_variance, Z, likelihood, Y, Y_metadata):
|
||||
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
|
||||
assert mean_function is None, "inference with a mean function not implemented"
|
||||
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
|
||||
|
||||
num_inducing, _ = Z.shape
|
||||
|
|
@ -97,7 +99,7 @@ class vDTC(object):
|
|||
#make sure the noise is not hetero
|
||||
beta = 1./likelihood.gaussian_variance(Y_metadata)
|
||||
if beta.size > 1:
|
||||
raise NotImplementedError, "no hetero noise with this implementation of DTC"
|
||||
raise NotImplementedError("no hetero noise with this implementation of DTC")
|
||||
|
||||
Kmm = kern.K(Z)
|
||||
Knn = kern.Kdiag(X)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from posterior import Posterior
|
||||
from .posterior import Posterior
|
||||
from ...util.linalg import pdinv, dpotrs, tdot
|
||||
from ...util import diag
|
||||
import numpy as np
|
||||
|
|
@ -36,16 +36,23 @@ class ExactGaussianInference(LatentFunctionInference):
|
|||
#print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
|
||||
return Y
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
|
||||
"""
|
||||
Returns a Posterior class containing essential quantities of the posterior
|
||||
"""
|
||||
YYT_factor = self.get_YYTfactor(Y)
|
||||
|
||||
if mean_function is None:
|
||||
m = 0
|
||||
else:
|
||||
m = mean_function.f(X)
|
||||
|
||||
|
||||
YYT_factor = self.get_YYTfactor(Y-m)
|
||||
|
||||
K = kern.K(X)
|
||||
|
||||
Ky = K.copy()
|
||||
diag.add(Ky, likelihood.gaussian_variance(Y_metadata))
|
||||
diag.add(Ky, likelihood.gaussian_variance(Y_metadata)+1e-8)
|
||||
Wi, LW, LWi, W_logdet = pdinv(Ky)
|
||||
|
||||
alpha, _ = dpotrs(LW, YYT_factor, lower=1)
|
||||
|
|
@ -56,4 +63,18 @@ class ExactGaussianInference(LatentFunctionInference):
|
|||
|
||||
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata)
|
||||
|
||||
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
|
||||
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
|
||||
|
||||
def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None):
|
||||
"""
|
||||
Leave one out error as found in
|
||||
"Bayesian leave-one-out cross-validation approximations for Gaussian latent variable models"
|
||||
Vehtari et al. 2014.
|
||||
"""
|
||||
g = posterior.woodbury_vector
|
||||
c = posterior.woodbury_inv
|
||||
c_diag = np.diag(c)[:, None]
|
||||
neg_log_marginal_LOO = 0.5*np.log(2*np.pi) - 0.5*np.log(c_diag) + 0.5*(g**2)/c_diag
|
||||
#believe from Predictive Approaches for Choosing Hyperparameters in Gaussian Processes
|
||||
#this is the negative marginal LOO
|
||||
return -neg_log_marginal_LOO
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
import numpy as np
|
||||
from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs
|
||||
from posterior import Posterior
|
||||
from .posterior import Posterior
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
|
|
@ -33,15 +33,19 @@ class EP(LatentFunctionInference):
|
|||
# TODO: update approximation in the end as well? Maybe even with a switch?
|
||||
pass
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None):
|
||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, Z=None):
|
||||
assert mean_function is None, "inference with a mean function not implemented"
|
||||
num_data, output_dim = Y.shape
|
||||
assert output_dim ==1, "ep in 1D only (for now!)"
|
||||
|
||||
K = kern.K(X)
|
||||
|
||||
if self._ep_approximation is None:
|
||||
|
||||
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
||||
else:
|
||||
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
|
||||
|
||||
Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from ...util import diag
|
|||
from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR
|
||||
from ...core.parameterization.variational import VariationalPosterior
|
||||
from . import LatentFunctionInference
|
||||
from posterior import Posterior
|
||||
from .posterior import Posterior
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class EPDTC(LatentFunctionInference):
|
||||
|
|
@ -64,7 +64,8 @@ class EPDTC(LatentFunctionInference):
|
|||
self.old_mutilde, self.old_vtilde = None, None
|
||||
self._ep_approximation = None
|
||||
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
|
||||
assert mean_function is None, "inference with a mean function not implemented"
|
||||
num_data, output_dim = Y.shape
|
||||
assert output_dim ==1, "ep in 1D only (for now!)"
|
||||
|
||||
|
|
@ -179,7 +180,7 @@ class EPDTC(LatentFunctionInference):
|
|||
if VVT_factor.shape[1] == Y.shape[1]:
|
||||
woodbury_vector = Cpsi1Vf # == Cpsi1V
|
||||
else:
|
||||
print 'foobar'
|
||||
print('foobar')
|
||||
psi1V = np.dot(mu_tilde[:,None].T*beta, psi1).T
|
||||
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
|
||||
tmp, _ = dpotrs(LB, tmp, lower=1)
|
||||
|
|
@ -314,7 +315,7 @@ def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf,
|
|||
dL_dR = None
|
||||
elif het_noise:
|
||||
if uncertain_inputs:
|
||||
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
|
||||
raise NotImplementedError("heteroscedatic derivates with uncertain inputs not implemented")
|
||||
else:
|
||||
#from ...util.linalg import chol_inv
|
||||
#LBi = chol_inv(LB)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Copyright (c) 2012, James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from posterior import Posterior
|
||||
from .posterior import Posterior
|
||||
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
|
||||
from ...util import diag
|
||||
import numpy as np
|
||||
|
|
@ -18,7 +18,8 @@ class FITC(LatentFunctionInference):
|
|||
"""
|
||||
const_jitter = 1e-6
|
||||
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
|
||||
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
|
||||
assert mean_function is None, "inference with a mean function not implemented"
|
||||
|
||||
num_inducing, _ = Z.shape
|
||||
num_data, output_dim = Y.shape
|
||||
|
|
@ -26,7 +27,7 @@ class FITC(LatentFunctionInference):
|
|||
#make sure the noise is not hetero
|
||||
sigma_n = likelihood.gaussian_variance(Y_metadata)
|
||||
if sigma_n.size >1:
|
||||
raise NotImplementedError, "no hetero noise with this implementation of FITC"
|
||||
raise NotImplementedError("no hetero noise with this implementation of FITC")
|
||||
|
||||
Kmm = kern.K(Z)
|
||||
Knn = kern.Kdiag(X)
|
||||
|
|
|
|||
|
|
@ -12,13 +12,14 @@
|
|||
|
||||
import numpy as np
|
||||
from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify, pdinv
|
||||
from posterior import Posterior
|
||||
from .posterior import Posterior
|
||||
import warnings
|
||||
def warning_on_one_line(message, category, filename, lineno, file=None, line=None):
|
||||
return ' %s:%s: %s:%s\n' % (filename, lineno, category.__name__, message)
|
||||
warnings.formatwarning = warning_on_one_line
|
||||
from scipy import optimize
|
||||
from . import LatentFunctionInference
|
||||
from scipy.integrate import quad
|
||||
|
||||
class Laplace(LatentFunctionInference):
|
||||
|
||||
|
|
@ -39,10 +40,90 @@ class Laplace(LatentFunctionInference):
|
|||
self.first_run = True
|
||||
self._previous_Ki_fhat = None
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None, f_hat=None, W=None, Ki_W_i=None):
|
||||
"""
|
||||
Leave one out log predictive density as found in
|
||||
"Bayesian leave-one-out cross-validation approximations for Gaussian latent variable models"
|
||||
Vehtari et al. 2014.
|
||||
"""
|
||||
Ki_f_init = np.zeros_like(Y)
|
||||
|
||||
if K is None:
|
||||
K = kern.K(X)
|
||||
|
||||
if f_hat is None:
|
||||
f_hat, _ = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
|
||||
|
||||
if W is None:
|
||||
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
|
||||
|
||||
if Ki_W_i is None:
|
||||
_, _, _, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
|
||||
logpdf_dfhat = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata)
|
||||
|
||||
if W.shape[1] == 1:
|
||||
W = np.diagflat(W)
|
||||
|
||||
#Eq 14, and 16
|
||||
var_site = 1./np.diag(W)[:, None]
|
||||
mu_site = f_hat + var_site*logpdf_dfhat
|
||||
prec_site = 1./var_site
|
||||
#Eq 19
|
||||
marginal_cov = Ki_W_i
|
||||
marginal_mu = marginal_cov.dot(np.diagflat(prec_site)).dot(mu_site)
|
||||
marginal_var = np.diag(marginal_cov)[:, None]
|
||||
#Eq 30 with using site parameters instead of Gaussian site parameters
|
||||
#(var_site instead of sigma^{2} )
|
||||
posterior_cav_var = 1./(1./marginal_var - 1./var_site)
|
||||
posterior_cav_mean = posterior_cav_var*((1./marginal_var)*marginal_mu - (1./var_site)*Y)
|
||||
|
||||
flat_y = Y.flatten()
|
||||
flat_mu = posterior_cav_mean.flatten()
|
||||
flat_var = posterior_cav_var.flatten()
|
||||
|
||||
if Y_metadata is not None:
|
||||
#Need to zip individual elements of Y_metadata aswell
|
||||
Y_metadata_flat = {}
|
||||
if Y_metadata is not None:
|
||||
for key, val in Y_metadata.items():
|
||||
Y_metadata_flat[key] = np.atleast_1d(val).reshape(-1, 1)
|
||||
|
||||
zipped_values = []
|
||||
|
||||
for i in range(Y.shape[0]):
|
||||
y_m = {}
|
||||
for key, val in Y_metadata_flat.items():
|
||||
if np.isscalar(val) or val.shape[0] == 1:
|
||||
y_m[key] = val
|
||||
else:
|
||||
#Won't broadcast yet
|
||||
y_m[key] = val[i]
|
||||
zipped_values.append((flat_y[i], flat_mu[i], flat_var[i], y_m))
|
||||
else:
|
||||
#Otherwise just pass along None's
|
||||
zipped_values = zip(flat_y, flat_mu, flat_var, [None]*Y.shape[0])
|
||||
|
||||
def integral_generator(yi, mi, vi, yi_m):
|
||||
def f(fi_star):
|
||||
#More stable in the log space
|
||||
p_fi = np.exp(likelihood.logpdf(fi_star, yi, yi_m)
|
||||
- 0.5*np.log(2*np.pi*vi)
|
||||
- 0.5*np.square(mi-fi_star)/vi)
|
||||
return p_fi
|
||||
return f
|
||||
|
||||
#Eq 30
|
||||
p_ystar, _ = zip(*[quad(integral_generator(y, m, v, yi_m), -np.inf, np.inf)
|
||||
for y, m, v, yi_m in zipped_values])
|
||||
p_ystar = np.array(p_ystar).reshape(-1, 1)
|
||||
return np.log(p_ystar)
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
|
||||
"""
|
||||
Returns a Posterior class containing essential quantities of the posterior
|
||||
"""
|
||||
assert mean_function is None, "inference with a mean function not implemented"
|
||||
|
||||
# Compute K
|
||||
K = kern.K(X)
|
||||
|
|
@ -50,21 +131,25 @@ class Laplace(LatentFunctionInference):
|
|||
#Find mode
|
||||
if self.bad_fhat or self.first_run:
|
||||
Ki_f_init = np.zeros_like(Y)
|
||||
first_run = False
|
||||
self.first_run = False
|
||||
else:
|
||||
Ki_f_init = self._previous_Ki_fhat
|
||||
|
||||
Ki_f_init = np.zeros_like(Y)# FIXME: take this out
|
||||
|
||||
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
|
||||
|
||||
self.f_hat = f_hat
|
||||
self.Ki_fhat = Ki_fhat
|
||||
self.K = K.copy()
|
||||
#self.Ki_fhat = Ki_fhat
|
||||
#self.K = K.copy()
|
||||
|
||||
#Compute hessian and other variables at mode
|
||||
log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
|
||||
|
||||
self._previous_Ki_fhat = Ki_fhat.copy()
|
||||
return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
|
||||
|
||||
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None):
|
||||
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs):
|
||||
"""
|
||||
Rasmussen's numerically stable mode finding
|
||||
For nomenclature see Rasmussen & Williams 2006
|
||||
|
|
@ -89,7 +174,12 @@ class Laplace(LatentFunctionInference):
|
|||
|
||||
#define the objective function (to be maximised)
|
||||
def obj(Ki_f, f):
|
||||
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
|
||||
ll = -0.5*np.sum(np.dot(Ki_f.T, f)) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
|
||||
if np.isnan(ll):
|
||||
return -np.inf
|
||||
else:
|
||||
return ll
|
||||
|
||||
|
||||
difference = np.inf
|
||||
iteration = 0
|
||||
|
|
@ -104,7 +194,7 @@ class Laplace(LatentFunctionInference):
|
|||
W_f = W*f
|
||||
|
||||
b = W_f + grad # R+W p46 line 6.
|
||||
W12BiW12, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
W12BiW12, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs)
|
||||
W12BiW12Kb = np.dot(W12BiW12, np.dot(K, b))
|
||||
|
||||
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
|
||||
|
|
@ -121,7 +211,9 @@ class Laplace(LatentFunctionInference):
|
|||
step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
|
||||
Ki_f_new = Ki_f + step*dKi_f
|
||||
f_new = np.dot(K, Ki_f_new)
|
||||
|
||||
#print "new {} vs old {}".format(obj(Ki_f_new, f_new), obj(Ki_f, f))
|
||||
if obj(Ki_f_new, f_new) < obj(Ki_f, f):
|
||||
raise ValueError("Shouldn't happen, brent optimization failing")
|
||||
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
|
||||
Ki_f = Ki_f_new
|
||||
f = f_new
|
||||
|
|
@ -152,14 +244,10 @@ class Laplace(LatentFunctionInference):
|
|||
if np.any(np.isnan(W)):
|
||||
raise ValueError('One or more element(s) of W is NaN')
|
||||
|
||||
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
|
||||
#compute vital matrices
|
||||
C = np.dot(LiW12, K)
|
||||
Ki_W_i = K - C.T.dot(C)
|
||||
K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
|
||||
#compute the log marginal
|
||||
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - np.sum(np.log(np.diag(L)))
|
||||
log_marginal = -0.5*np.sum(np.dot(Ki_f.T, f_hat)) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*logdet_I_KW
|
||||
|
||||
# Compute matrices for derivatives
|
||||
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
|
||||
|
|
@ -196,23 +284,23 @@ class Laplace(LatentFunctionInference):
|
|||
dL_dthetaL = np.zeros(num_params)
|
||||
for thetaL_i in range(num_params):
|
||||
#Explicit
|
||||
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i])
|
||||
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i,:, :])
|
||||
# The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL
|
||||
+ 0.5*np.sum(np.diag(Ki_W_i).flatten()*dlik_hess_dthetaL[:, thetaL_i].flatten())
|
||||
+ 0.5*np.sum(np.diag(Ki_W_i)*np.squeeze(dlik_hess_dthetaL[thetaL_i, :, :]))
|
||||
)
|
||||
|
||||
#Implicit
|
||||
dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[:, thetaL_i])
|
||||
#dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[:, thetaL_i])
|
||||
dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[thetaL_i, :, :])
|
||||
#dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[thetaL_i, :, :])
|
||||
dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL)
|
||||
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
|
||||
dL_dthetaL[thetaL_i] = np.sum(dL_dthetaL_exp + dL_dthetaL_imp)
|
||||
|
||||
else:
|
||||
dL_dthetaL = np.zeros(likelihood.size)
|
||||
|
||||
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
|
||||
|
||||
def _compute_B_statistics(self, K, W, log_concave):
|
||||
def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):
|
||||
"""
|
||||
Rasmussen suggests the use of a numerically stable positive definite matrix B
|
||||
Which has a positive diagonal elements and can be easily inverted
|
||||
|
|
@ -225,7 +313,7 @@ class Laplace(LatentFunctionInference):
|
|||
"""
|
||||
if not log_concave:
|
||||
#print "Under 1e-10: {}".format(np.sum(W < 1e-6))
|
||||
W[W<1e-6] = 1e-6
|
||||
W = np.clip(W, 1e-6, 1e+30)
|
||||
# NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!!
|
||||
#W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
|
||||
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
|
||||
|
|
@ -247,5 +335,160 @@ class Laplace(LatentFunctionInference):
|
|||
#K_Wi_i_2 , _= dpotri(L2)
|
||||
#symmetrify(K_Wi_i_2)
|
||||
|
||||
return K_Wi_i, L, LiW12
|
||||
#compute vital matrices
|
||||
C = np.dot(LiW12, K)
|
||||
Ki_W_i = K - C.T.dot(C)
|
||||
|
||||
I_KW_i = np.eye(K.shape[0]) - np.dot(K, K_Wi_i)
|
||||
logdet_I_KW = 2*np.sum(np.log(np.diag(L)))
|
||||
|
||||
return K_Wi_i, logdet_I_KW, I_KW_i, Ki_W_i
|
||||
|
||||
class LaplaceBlock(Laplace):
|
||||
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None, *args, **kwargs):
|
||||
Ki_f = Ki_f_init.copy()
|
||||
f = np.dot(K, Ki_f)
|
||||
|
||||
#define the objective function (to be maximised)
|
||||
def obj(Ki_f, f):
|
||||
ll = -0.5*np.dot(Ki_f.T, f) + np.sum(likelihood.logpdf_sum(f, Y, Y_metadata=Y_metadata))
|
||||
if np.isnan(ll):
|
||||
return -np.inf
|
||||
else:
|
||||
return ll
|
||||
|
||||
difference = np.inf
|
||||
iteration = 0
|
||||
|
||||
I = np.eye(K.shape[0])
|
||||
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
|
||||
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
|
||||
|
||||
W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30)
|
||||
|
||||
W_f = np.dot(W, f)
|
||||
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
|
||||
|
||||
b = W_f + grad # R+W p46 line 6.
|
||||
K_Wi_i, _, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave, *args, **kwargs)
|
||||
|
||||
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
|
||||
#a = (I - (K+Wi)i*K)*b
|
||||
full_step_Ki_f = np.dot(I - np.dot(K_Wi_i, K), b)
|
||||
dKi_f = full_step_Ki_f - Ki_f
|
||||
|
||||
#define an objective for the line search (minimize this one)
|
||||
def inner_obj(step_size):
|
||||
Ki_f_trial = Ki_f + step_size*dKi_f
|
||||
f_trial = np.dot(K, Ki_f_trial)
|
||||
return -obj(Ki_f_trial, f_trial)
|
||||
|
||||
#use scipy for the line search, the compute new values of f, Ki_f
|
||||
step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
|
||||
|
||||
Ki_f_new = Ki_f + step*dKi_f
|
||||
f_new = np.dot(K, Ki_f_new)
|
||||
|
||||
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
|
||||
Ki_f = Ki_f_new
|
||||
f = f_new
|
||||
iteration += 1
|
||||
|
||||
#Warn of bad fits
|
||||
if difference > self._mode_finding_tolerance:
|
||||
if not self.bad_fhat:
|
||||
warnings.warn("Not perfect f_hat fit difference: {}".format(difference))
|
||||
self._previous_Ki_fhat = np.zeros_like(Y)
|
||||
self.bad_fhat = True
|
||||
elif self.bad_fhat:
|
||||
self.bad_fhat = False
|
||||
warnings.warn("f_hat now fine again")
|
||||
if iteration > self._mode_finding_max_iter:
|
||||
warnings.warn("didn't find the best")
|
||||
|
||||
return f, Ki_f
|
||||
|
||||
def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, kern, Y_metadata):
|
||||
#At this point get the hessian matrix (or vector as W is diagonal)
|
||||
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
|
||||
|
||||
W[np.diag_indices_from(W)] = np.clip(np.diag(W), 1e-6, 1e+30)
|
||||
|
||||
K_Wi_i, log_B_det, I_KW_i, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
|
||||
|
||||
#compute the log marginal
|
||||
#FIXME: The derterminant should be output_dim*0.5 I think, gradients may now no longer check
|
||||
log_marginal = -0.5*np.dot(f_hat.T, Ki_f) + np.sum(likelihood.logpdf_sum(f_hat, Y, Y_metadata=Y_metadata)) - 0.5*log_B_det
|
||||
|
||||
#Compute vival matrices for derivatives
|
||||
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
|
||||
|
||||
#dL_dfhat = np.zeros((f_hat.shape[0]))
|
||||
#for i in range(f_hat.shape[0]):
|
||||
#dL_dfhat[i] = -0.5*np.trace(np.dot(Ki_W_i, dW_df[:,:,i]))
|
||||
|
||||
dL_dfhat = -0.5*np.einsum('ij,ijk->k', Ki_W_i, dW_df)
|
||||
|
||||
woodbury_vector = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata)
|
||||
|
||||
####################
|
||||
#compute dL_dK#
|
||||
####################
|
||||
if kern.size > 0 and not kern.is_fixed:
|
||||
#Explicit
|
||||
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
|
||||
|
||||
#Implicit
|
||||
implicit_part = woodbury_vector.dot(dL_dfhat[None,:]).dot(I_KW_i)
|
||||
#implicit_part = Ki_f.dot(dL_dfhat[None,:]).dot(I_KW_i)
|
||||
|
||||
dL_dK = explicit_part + implicit_part
|
||||
else:
|
||||
dL_dK = np.zeros_like(K)
|
||||
|
||||
####################
|
||||
#compute dL_dthetaL#
|
||||
####################
|
||||
if likelihood.size > 0 and not likelihood.is_fixed:
|
||||
raise NotImplementedError
|
||||
else:
|
||||
dL_dthetaL = np.zeros(likelihood.size)
|
||||
|
||||
#self.K_Wi_i = K_Wi_i
|
||||
#self.Ki_W_i = Ki_W_i
|
||||
#self.W = W
|
||||
#self.K = K
|
||||
#self.dL_dfhat = dL_dfhat
|
||||
#self.explicit_part = explicit_part
|
||||
#self.implicit_part = implicit_part
|
||||
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
|
||||
|
||||
def _compute_B_statistics(self, K, W, log_concave, *args, **kwargs):
|
||||
"""
|
||||
Rasmussen suggests the use of a numerically stable positive definite matrix B
|
||||
Which has a positive diagonal element and can be easyily inverted
|
||||
|
||||
:param K: Prior Covariance matrix evaluated at locations X
|
||||
:type K: NxN matrix
|
||||
:param W: Negative hessian at a point (diagonal matrix)
|
||||
:type W: Vector of diagonal values of hessian (1xN)
|
||||
:returns: (K_Wi_i, L_B, not_provided)
|
||||
"""
|
||||
#w = GPy.util.diag.view(W)
|
||||
#W[:] = np.where(w<1e-6, 1e-6, w)
|
||||
|
||||
#B = I + KW
|
||||
B = np.eye(K.shape[0]) + np.dot(K, W)
|
||||
#Bi, L, Li, logdetB = pdinv(B)
|
||||
Bi = np.linalg.inv(B)
|
||||
|
||||
#K_Wi_i = np.eye(K.shape[0]) - mdot(W, Bi, K)
|
||||
K_Wi_i = np.dot(W, Bi)
|
||||
|
||||
#self.K_Wi_i_brute = np.linalg.inv(K + np.linalg.inv(W))
|
||||
#self.B = B
|
||||
#self.Bi = Bi
|
||||
Ki_W_i = np.dot(Bi, K)
|
||||
|
||||
sign, logdetB = np.linalg.slogdet(B)
|
||||
return K_Wi_i, sign*logdetB, Bi, Ki_W_i
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ class Posterior(object):
|
|||
the function at any new point x_* by integrating over this posterior.
|
||||
|
||||
"""
|
||||
def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None):
|
||||
def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None, prior_mean=0):
|
||||
"""
|
||||
woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K
|
||||
woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M
|
||||
|
|
@ -52,7 +52,7 @@ class Posterior(object):
|
|||
or ((mean is not None) and (cov is not None)):
|
||||
pass # we have sufficient to compute the posterior
|
||||
else:
|
||||
raise ValueError, "insufficient information to compute the posterior"
|
||||
raise ValueError("insufficient information to compute the posterior")
|
||||
|
||||
self._K_chol = K_chol
|
||||
self._K = K
|
||||
|
|
@ -67,6 +67,7 @@ class Posterior(object):
|
|||
#option 2:
|
||||
self._mean = mean
|
||||
self._covariance = cov
|
||||
self._prior_mean = prior_mean
|
||||
|
||||
#compute this lazily
|
||||
self._precision = None
|
||||
|
|
@ -107,7 +108,7 @@ class Posterior(object):
|
|||
if self._precision is None:
|
||||
cov = np.atleast_3d(self.covariance)
|
||||
self._precision = np.zeros(cov.shape) # if one covariance per dimension
|
||||
for p in xrange(cov.shape[-1]):
|
||||
for p in range(cov.shape[-1]):
|
||||
self._precision[:,:,p] = pdinv(cov[:,:,p])[0]
|
||||
return self._precision
|
||||
|
||||
|
|
@ -125,7 +126,7 @@ class Posterior(object):
|
|||
if self._woodbury_inv is not None:
|
||||
winv = np.atleast_3d(self._woodbury_inv)
|
||||
self._woodbury_chol = np.zeros(winv.shape)
|
||||
for p in xrange(winv.shape[-1]):
|
||||
for p in range(winv.shape[-1]):
|
||||
self._woodbury_chol[:,:,p] = pdinv(winv[:,:,p])[2]
|
||||
#Li = jitchol(self._woodbury_inv)
|
||||
#self._woodbury_chol, _ = dtrtri(Li)
|
||||
|
|
@ -134,13 +135,13 @@ class Posterior(object):
|
|||
#self._woodbury_chol = jitchol(W)
|
||||
#try computing woodbury chol from cov
|
||||
elif self._covariance is not None:
|
||||
raise NotImplementedError, "TODO: check code here"
|
||||
raise NotImplementedError("TODO: check code here")
|
||||
B = self._K - self._covariance
|
||||
tmp, _ = dpotrs(self.K_chol, B)
|
||||
self._woodbury_inv, _ = dpotrs(self.K_chol, tmp.T)
|
||||
_, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
|
||||
else:
|
||||
raise ValueError, "insufficient information to compute posterior"
|
||||
raise ValueError("insufficient information to compute posterior")
|
||||
return self._woodbury_chol
|
||||
|
||||
@property
|
||||
|
|
@ -160,7 +161,7 @@ class Posterior(object):
|
|||
elif self._covariance is not None:
|
||||
B = np.atleast_3d(self._K) - np.atleast_3d(self._covariance)
|
||||
self._woodbury_inv = np.empty_like(B)
|
||||
for i in xrange(B.shape[-1]):
|
||||
for i in range(B.shape[-1]):
|
||||
tmp, _ = dpotrs(self.K_chol, B[:,:,i])
|
||||
self._woodbury_inv[:,:,i], _ = dpotrs(self.K_chol, tmp.T)
|
||||
return self._woodbury_inv
|
||||
|
|
@ -175,7 +176,7 @@ class Posterior(object):
|
|||
$$
|
||||
"""
|
||||
if self._woodbury_vector is None:
|
||||
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean)
|
||||
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean - self._prior_mean)
|
||||
return self._woodbury_vector
|
||||
|
||||
@property
|
||||
|
|
|
|||
|
|
@ -2,17 +2,22 @@ from . import LatentFunctionInference
|
|||
from ...util import linalg
|
||||
from ...util import choleskies
|
||||
import numpy as np
|
||||
from posterior import Posterior
|
||||
from .posterior import Posterior
|
||||
|
||||
class SVGP(LatentFunctionInference):
|
||||
|
||||
def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
|
||||
num_inducing = Z.shape[0]
|
||||
num_data, num_outputs = Y.shape
|
||||
def inference(self, q_u_mean, q_u_chol, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, KL_scale=1.0, batch_scale=1.0):
|
||||
|
||||
num_data, _ = Y.shape
|
||||
num_inducing, num_outputs = q_u_mean.shape
|
||||
|
||||
#expand cholesky representation
|
||||
L = choleskies.flat_to_triang(q_u_chol)
|
||||
S = np.einsum('ijk,ljk->ilk', L, L) #L.dot(L.T)
|
||||
|
||||
|
||||
S = np.empty((num_outputs, num_inducing, num_inducing))
|
||||
[np.dot(L[:,:,i], L[:,:,i].T, S[i,:,:]) for i in range(num_outputs)]
|
||||
S = S.swapaxes(0,2)
|
||||
#Si,_ = linalg.dpotri(np.asfortranarray(L), lower=1)
|
||||
Si = choleskies.multiple_dpotri(L)
|
||||
logdetS = np.array([2.*np.sum(np.log(np.abs(np.diag(L[:,:,i])))) for i in range(L.shape[-1])])
|
||||
|
|
@ -22,6 +27,15 @@ class SVGP(LatentFunctionInference):
|
|||
#S = S + np.eye(S.shape[0])*1e-5*np.max(np.max(S))
|
||||
#Si, Lnew, _,_ = linalg.pdinv(S)
|
||||
|
||||
#compute mean function stuff
|
||||
if mean_function is not None:
|
||||
prior_mean_u = mean_function.f(Z)
|
||||
prior_mean_f = mean_function.f(X)
|
||||
else:
|
||||
prior_mean_u = np.zeros((num_inducing, num_outputs))
|
||||
prior_mean_f = np.zeros((num_data, num_outputs))
|
||||
|
||||
|
||||
#compute kernel related stuff
|
||||
Kmm = kern.K(Z)
|
||||
Knm = kern.K(X, Z)
|
||||
|
|
@ -30,38 +44,64 @@ class SVGP(LatentFunctionInference):
|
|||
|
||||
#compute the marginal means and variances of q(f)
|
||||
A = np.dot(Knm, Kmmi)
|
||||
mu = np.dot(A, q_u_mean)
|
||||
v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jkl->ikl', A, S),1)
|
||||
mu = prior_mean_f + np.dot(A, q_u_mean - prior_mean_u)
|
||||
#v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * np.einsum('ij,jlk->ilk', A, S),1)
|
||||
v = Knn_diag[:,None] - np.sum(A*Knm,1)[:,None] + np.sum(A[:,:,None] * linalg.ij_jlk_to_ilk(A, S),1)
|
||||
|
||||
#compute the KL term
|
||||
Kmmim = np.dot(Kmmi, q_u_mean)
|
||||
KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.einsum('ij,ijk->k', Kmmi, S) + 0.5*np.sum(q_u_mean*Kmmim,0)
|
||||
KLs = -0.5*logdetS -0.5*num_inducing + 0.5*logdetKmm + 0.5*np.sum(Kmmi[:,:,None]*S,0).sum(0) + 0.5*np.sum(q_u_mean*Kmmim,0)
|
||||
KL = KLs.sum()
|
||||
dKL_dm = Kmmim
|
||||
#gradient of the KL term (assuming zero mean function)
|
||||
dKL_dm = Kmmim.copy()
|
||||
dKL_dS = 0.5*(Kmmi[:,:,None] - Si)
|
||||
dKL_dKmm = 0.5*num_outputs*Kmmi - 0.5*Kmmi.dot(S.sum(-1)).dot(Kmmi) - 0.5*Kmmim.dot(Kmmim.T)
|
||||
|
||||
if mean_function is not None:
|
||||
#adjust KL term for mean function
|
||||
Kmmi_mfZ = np.dot(Kmmi, prior_mean_u)
|
||||
KL += -np.sum(q_u_mean*Kmmi_mfZ)
|
||||
KL += 0.5*np.sum(Kmmi_mfZ*prior_mean_u)
|
||||
|
||||
#adjust gradient for mean fucntion
|
||||
dKL_dm -= Kmmi_mfZ
|
||||
dKL_dKmm += Kmmim.dot(Kmmi_mfZ.T)
|
||||
dKL_dKmm -= 0.5*Kmmi_mfZ.dot(Kmmi_mfZ.T)
|
||||
|
||||
#compute gradients for mean_function
|
||||
dKL_dmfZ = Kmmi_mfZ - Kmmim
|
||||
|
||||
#quadrature for the likelihood
|
||||
F, dF_dmu, dF_dv, dF_dthetaL = likelihood.variational_expectations(Y, mu, v, Y_metadata=Y_metadata)
|
||||
|
||||
#rescale the F term if working on a batch
|
||||
F, dF_dmu, dF_dv = F*batch_scale, dF_dmu*batch_scale, dF_dv*batch_scale
|
||||
if dF_dthetaL is not None:
|
||||
dF_dthetaL = dF_dthetaL.sum(1).sum(1)*batch_scale
|
||||
|
||||
#derivatives of expected likelihood
|
||||
#derivatives of expected likelihood, assuming zero mean function
|
||||
Adv = A.T[:,:,None]*dF_dv[None,:,:] # As if dF_Dv is diagonal
|
||||
Admu = A.T.dot(dF_dmu)
|
||||
#AdvA = np.einsum('ijk,jl->ilk', Adv, A)
|
||||
#AdvA = np.dot(A.T, Adv).swapaxes(0,1)
|
||||
AdvA = np.dstack([np.dot(A.T, Adv[:,:,i].T) for i in range(num_outputs)])
|
||||
tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi)
|
||||
#tmp = np.einsum('ijk,jlk->il', AdvA, S).dot(Kmmi)
|
||||
tmp = linalg.ijk_jlk_to_il(AdvA, S).dot(Kmmi)
|
||||
dF_dKmm = -Admu.dot(Kmmim.T) + AdvA.sum(-1) - tmp - tmp.T
|
||||
dF_dKmm = 0.5*(dF_dKmm + dF_dKmm.T) # necessary? GPy bug?
|
||||
tmp = 2.*(np.einsum('ij,jlk->ilk', Kmmi,S) - np.eye(num_inducing)[:,:,None])
|
||||
dF_dKmn = np.einsum('ijk,jlk->il', tmp, Adv) + Kmmim.dot(dF_dmu.T)
|
||||
#tmp = 2.*(np.einsum('ij,jlk->ilk', Kmmi,S) - np.eye(num_inducing)[:,:,None])
|
||||
tmp = 2.*(linalg.ij_jlk_to_ilk(Kmmi, S) - np.eye(num_inducing)[:,:,None])
|
||||
#dF_dKmn = np.einsum('ijk,jlk->il', tmp, Adv) + Kmmim.dot(dF_dmu.T)
|
||||
dF_dKmn = linalg.ijk_jlk_to_il(tmp, Adv) + Kmmim.dot(dF_dmu.T)
|
||||
dF_dm = Admu
|
||||
dF_dS = AdvA
|
||||
|
||||
#adjust gradient to account for mean function
|
||||
if mean_function is not None:
|
||||
dF_dmfX = dF_dmu.copy()
|
||||
dF_dmfZ = -Admu
|
||||
dF_dKmn -= np.dot(Kmmi_mfZ, dF_dmu.T)
|
||||
dF_dKmm += Admu.dot(Kmmi_mfZ.T)
|
||||
|
||||
|
||||
#sum (gradients of) expected likelihood and KL part
|
||||
log_marginal = F.sum() - KL
|
||||
dL_dm, dL_dS, dL_dKmm, dL_dKmn = dF_dm - dKL_dm, dF_dS- dKL_dS, dF_dKmm- dKL_dKmm, dF_dKmn
|
||||
|
|
@ -69,4 +109,8 @@ class SVGP(LatentFunctionInference):
|
|||
dL_dchol = np.dstack([2.*np.dot(dL_dS[:,:,i], L[:,:,i]) for i in range(num_outputs)])
|
||||
dL_dchol = choleskies.triang_to_flat(dL_dchol)
|
||||
|
||||
return Posterior(mean=q_u_mean, cov=S, K=Kmm), log_marginal, {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv, 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
|
||||
grad_dict = {'dL_dKmm':dL_dKmm, 'dL_dKmn':dL_dKmn, 'dL_dKdiag': dF_dv.sum(1), 'dL_dm':dL_dm, 'dL_dchol':dL_dchol, 'dL_dthetaL':dF_dthetaL}
|
||||
if mean_function is not None:
|
||||
grad_dict['dL_dmfZ'] = dF_dmfZ - dKL_dmfZ
|
||||
grad_dict['dL_dmfX'] = dF_dmfX
|
||||
return Posterior(mean=q_u_mean, cov=S, K=Kmm, prior_mean=prior_mean_u), log_marginal, grad_dict
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from posterior import Posterior
|
||||
from .posterior import Posterior
|
||||
from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
|
||||
from ...util import diag
|
||||
from ...core.parameterization.variational import VariationalPosterior
|
||||
|
|
@ -170,7 +170,7 @@ class VarDTC(LatentFunctionInference):
|
|||
if VVT_factor.shape[1] == Y.shape[1]:
|
||||
woodbury_vector = Cpsi1Vf # == Cpsi1V
|
||||
else:
|
||||
print 'foobar'
|
||||
print('foobar')
|
||||
import ipdb; ipdb.set_trace()
|
||||
psi1V = np.dot(Y.T*beta, psi1).T
|
||||
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
|
||||
|
|
@ -213,7 +213,7 @@ def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf,
|
|||
dL_dR = None
|
||||
elif het_noise:
|
||||
if uncertain_inputs:
|
||||
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
|
||||
raise NotImplementedError("heteroscedatic derivates with uncertain inputs not implemented")
|
||||
else:
|
||||
#from ...util.linalg import chol_inv
|
||||
#LBi = chol_inv(LB)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Copyright (c) 2014, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from posterior import Posterior
|
||||
from .posterior import Posterior
|
||||
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv
|
||||
from ...util import diag
|
||||
from ...core.parameterization.variational import VariationalPosterior
|
||||
|
|
@ -92,7 +92,7 @@ class VarDTC_minibatch(LatentFunctionInference):
|
|||
psi0_full = 0.
|
||||
YRY_full = 0.
|
||||
|
||||
for n_start in xrange(0,num_data,batchsize):
|
||||
for n_start in range(0,num_data,batchsize):
|
||||
n_end = min(batchsize+n_start, num_data)
|
||||
if batchsize==num_data:
|
||||
Y_slice = Y
|
||||
|
|
@ -169,11 +169,13 @@ class VarDTC_minibatch(LatentFunctionInference):
|
|||
|
||||
Kmm = kern.K(Z).copy()
|
||||
diag.add(Kmm, self.const_jitter)
|
||||
Lm = jitchol(Kmm, maxtries=100)
|
||||
if not np.isfinite(Kmm).all():
|
||||
print(Kmm)
|
||||
Lm = jitchol(Kmm)
|
||||
|
||||
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
|
||||
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
|
||||
LL = jitchol(Lambda, maxtries=100)
|
||||
LL = jitchol(Lambda)
|
||||
logdet_L = 2.*np.sum(np.log(np.diag(LL)))
|
||||
b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0]
|
||||
bbt = np.square(b).sum()
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
from hmc import HMC
|
||||
from .hmc import HMC
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ class HMC:
|
|||
:rtype: numpy.ndarray
|
||||
"""
|
||||
params = np.empty((num_samples,self.p.size))
|
||||
for i in xrange(num_samples):
|
||||
for i in range(num_samples):
|
||||
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
|
||||
H_old = self._computeH()
|
||||
theta_old = self.model.optimizer_array.copy()
|
||||
|
|
@ -59,7 +59,7 @@ class HMC:
|
|||
return params
|
||||
|
||||
def _update(self, hmc_iters):
|
||||
for i in xrange(hmc_iters):
|
||||
for i in range(hmc_iters):
|
||||
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
|
||||
self.model.optimizer_array = self.model.optimizer_array + self.stepsize*np.dot(self.Minv, self.p)
|
||||
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
|
||||
|
|
@ -82,7 +82,7 @@ class HMC_shortcut:
|
|||
|
||||
def sample(self, m_iters=1000, hmc_iters=20):
|
||||
params = np.empty((m_iters,self.p.size))
|
||||
for i in xrange(m_iters):
|
||||
for i in range(m_iters):
|
||||
# sample a stepsize from the uniform distribution
|
||||
stepsize = np.exp(np.random.rand()*(self.stepsize_range[1]-self.stepsize_range[0])+self.stepsize_range[0])
|
||||
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,13 @@ import sys
|
|||
import re
|
||||
import numdifftools as ndt
|
||||
import pdb
|
||||
import cPickle
|
||||
|
||||
try:
|
||||
#In Python 2, cPickle is faster. It does not exist in Python 3 but the underlying code is always used
|
||||
#if available
|
||||
import cPickle as pickle
|
||||
except ImportError:
|
||||
import pickle
|
||||
|
||||
|
||||
class Metropolis_Hastings:
|
||||
|
|
@ -40,7 +46,7 @@ class Metropolis_Hastings:
|
|||
fcurrent = self.model.log_likelihood() + self.model.log_prior()
|
||||
accepted = np.zeros(Ntotal,dtype=np.bool)
|
||||
for it in range(Ntotal):
|
||||
print "sample %d of %d\r"%(it,Ntotal),
|
||||
print("sample %d of %d\r"%(it,Ntotal), end=' ')
|
||||
sys.stdout.flush()
|
||||
prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
|
||||
self.model._set_params_transformed(prop)
|
||||
|
|
|
|||
|
|
@ -1,2 +1,2 @@
|
|||
from scg import SCG
|
||||
from optimization import *
|
||||
from .scg import SCG
|
||||
from .optimization import *
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Copyright (c) 2012-2014, Max Zwiessele
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from gradient_descent_update_rules import FletcherReeves, \
|
||||
from .gradient_descent_update_rules import FletcherReeves, \
|
||||
PolakRibiere
|
||||
from Queue import Empty
|
||||
from multiprocessing import Value
|
||||
|
|
@ -74,7 +74,7 @@ class _Async_Optimization(Thread):
|
|||
if self.outq is not None:
|
||||
self.outq.put(self.SENTINEL)
|
||||
if self.messages:
|
||||
print ""
|
||||
print("")
|
||||
self.runsignal.clear()
|
||||
|
||||
def run(self, *args, **kwargs):
|
||||
|
|
@ -213,7 +213,7 @@ class Async_Optimize(object):
|
|||
# # print "^C"
|
||||
# self.runsignal.clear()
|
||||
# c.join()
|
||||
print "WARNING: callback still running, optimisation done!"
|
||||
print("WARNING: callback still running, optimisation done!")
|
||||
return p.result
|
||||
|
||||
class CGD(Async_Optimize):
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ try:
|
|||
rasm_available = True
|
||||
except ImportError:
|
||||
rasm_available = False
|
||||
from scg import SCG
|
||||
from .scg import SCG
|
||||
|
||||
class Optimizer():
|
||||
"""
|
||||
|
|
@ -54,7 +54,7 @@ class Optimizer():
|
|||
self.time = str(end - start)
|
||||
|
||||
def opt(self, f_fp=None, f=None, fp=None):
|
||||
raise NotImplementedError, "this needs to be implemented to use the optimizer class"
|
||||
raise NotImplementedError("this needs to be implemented to use the optimizer class")
|
||||
|
||||
def plot(self):
|
||||
"""
|
||||
|
|
@ -125,9 +125,9 @@ class opt_lbfgsb(Optimizer):
|
|||
|
||||
opt_dict = {}
|
||||
if self.xtol is not None:
|
||||
print "WARNING: l-bfgs-b doesn't have an xtol arg, so I'm going to ignore it"
|
||||
print("WARNING: l-bfgs-b doesn't have an xtol arg, so I'm going to ignore it")
|
||||
if self.ftol is not None:
|
||||
print "WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it"
|
||||
print("WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it")
|
||||
if self.gtol is not None:
|
||||
opt_dict['pgtol'] = self.gtol
|
||||
if self.bfgs_factor is not None:
|
||||
|
|
@ -140,6 +140,10 @@ class opt_lbfgsb(Optimizer):
|
|||
self.funct_eval = opt_result[2]['funcalls']
|
||||
self.status = rcstrings[opt_result[2]['warnflag']]
|
||||
|
||||
#a more helpful error message is available in opt_result in the Error case
|
||||
if opt_result[2]['warnflag']==2:
|
||||
self.status = 'Error' + opt_result[2]['task']
|
||||
|
||||
class opt_simplex(Optimizer):
|
||||
def __init__(self, *args, **kwargs):
|
||||
Optimizer.__init__(self, *args, **kwargs)
|
||||
|
|
@ -158,7 +162,7 @@ class opt_simplex(Optimizer):
|
|||
if self.ftol is not None:
|
||||
opt_dict['ftol'] = self.ftol
|
||||
if self.gtol is not None:
|
||||
print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it"
|
||||
print("WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it")
|
||||
|
||||
opt_result = optimize.fmin(f, self.x_init, (), disp=self.messages,
|
||||
maxfun=self.max_f_eval, full_output=True, **opt_dict)
|
||||
|
|
@ -186,11 +190,11 @@ class opt_rasm(Optimizer):
|
|||
|
||||
opt_dict = {}
|
||||
if self.xtol is not None:
|
||||
print "WARNING: minimize doesn't have an xtol arg, so I'm going to ignore it"
|
||||
print("WARNING: minimize doesn't have an xtol arg, so I'm going to ignore it")
|
||||
if self.ftol is not None:
|
||||
print "WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it"
|
||||
print("WARNING: minimize doesn't have an ftol arg, so I'm going to ignore it")
|
||||
if self.gtol is not None:
|
||||
print "WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it"
|
||||
print("WARNING: minimize doesn't have an gtol arg, so I'm going to ignore it")
|
||||
|
||||
opt_result = rasm.minimize(self.x_init, f_fp, (), messages=self.messages,
|
||||
maxnumfuneval=self.max_f_eval)
|
||||
|
|
|
|||
|
|
@ -21,14 +21,13 @@
|
|||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
|
||||
def print_out(len_maxiters, fnow, current_grad, beta, iteration):
|
||||
print '\r',
|
||||
print '{0:>0{mi}g} {1:> 12e} {2:< 12.6e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
||||
print('\r', end=' ')
|
||||
print('{0:>0{mi}g} {1:> 12e} {2:< 12.6e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
||||
sys.stdout.flush()
|
||||
|
||||
def exponents(fnow, current_grad):
|
||||
|
|
@ -80,7 +79,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
|
|||
|
||||
len_maxiters = len(str(maxiters))
|
||||
if display:
|
||||
print ' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters)
|
||||
print(' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters))
|
||||
exps = exponents(fnow, current_grad)
|
||||
p_iter = iteration
|
||||
|
||||
|
|
@ -140,7 +139,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
|
|||
b = np.any(n_exps < exps)
|
||||
if a or b:
|
||||
p_iter = iteration
|
||||
print ''
|
||||
print('')
|
||||
if b:
|
||||
exps = n_exps
|
||||
|
||||
|
|
@ -189,6 +188,6 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
|
|||
|
||||
if display:
|
||||
print_out(len_maxiters, fnow, current_grad, beta, iteration)
|
||||
print ""
|
||||
print status
|
||||
print("")
|
||||
print(status)
|
||||
return x, flog, function_eval, status
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ class SparseGPMissing(StochasticStorage):
|
|||
Thus, we can just make sure the loop goes over self.d every
|
||||
time.
|
||||
"""
|
||||
self.d = xrange(model.Y_normalized.shape[1])
|
||||
self.d = range(model.Y_normalized.shape[1])
|
||||
|
||||
class SparseGPStochastics(StochasticStorage):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,20 +1,23 @@
|
|||
from _src.kern import Kern
|
||||
from _src.rbf import RBF
|
||||
from _src.linear import Linear, LinearFull
|
||||
from _src.static import Bias, White, Fixed
|
||||
from _src.brownian import Brownian
|
||||
from _src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
||||
from _src.mlp import MLP
|
||||
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
|
||||
from _src.independent_outputs import IndependentOutputs, Hierarchical
|
||||
from _src.coregionalize import Coregionalize
|
||||
from _src.ODE_UY import ODE_UY
|
||||
from _src.ODE_UYC import ODE_UYC
|
||||
from _src.ODE_st import ODE_st
|
||||
from _src.ODE_t import ODE_t
|
||||
from _src.poly import Poly
|
||||
from _src.eq_ode2 import EQ_ODE2
|
||||
from ._src.kern import Kern
|
||||
from ._src.rbf import RBF
|
||||
from ._src.linear import Linear, LinearFull
|
||||
from ._src.static import Bias, White, Fixed
|
||||
from ._src.brownian import Brownian
|
||||
from ._src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
|
||||
from ._src.mlp import MLP
|
||||
from ._src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
|
||||
from ._src.independent_outputs import IndependentOutputs, Hierarchical
|
||||
from ._src.coregionalize import Coregionalize
|
||||
from ._src.ODE_UY import ODE_UY
|
||||
from ._src.ODE_UYC import ODE_UYC
|
||||
from ._src.ODE_st import ODE_st
|
||||
from ._src.ODE_t import ODE_t
|
||||
from ._src.poly import Poly
|
||||
from ._src.eq_ode2 import EQ_ODE2
|
||||
from ._src.trunclinear import TruncLinear,TruncLinear_inf
|
||||
from ._src.splitKern import SplitKern,DEtime
|
||||
from ._src.splitKern import DEtime as DiffGenomeKern
|
||||
|
||||
from _src.trunclinear import TruncLinear,TruncLinear_inf
|
||||
from _src.splitKern import SplitKern,DiffGenomeKern
|
||||
|
||||
from _src.basis_funcs import LinearSlopeBasisFuncKernel, BasisFuncKernel, ChangePointBasisFuncKernel, DomainKernel
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
import numpy as np
|
||||
from independent_outputs import index_to_slices
|
||||
from .independent_outputs import index_to_slices
|
||||
|
||||
class ODE_UY(Kern):
|
||||
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., active_dims=None, name='ode_uy'):
|
||||
|
|
@ -114,7 +114,7 @@ class ODE_UY(Kern):
|
|||
elif i==1:
|
||||
Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
|
||||
else:
|
||||
raise ValueError, "invalid input/output index"
|
||||
raise ValueError("invalid input/output index")
|
||||
#Kdiag[slices[0][0]]+= self.variance_U #matern32 diag
|
||||
#Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag
|
||||
return Kdiag
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
import numpy as np
|
||||
from independent_outputs import index_to_slices
|
||||
from .independent_outputs import index_to_slices
|
||||
|
||||
class ODE_UYC(Kern):
|
||||
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., ubias =1. ,active_dims=None, name='ode_uyc'):
|
||||
|
|
@ -115,7 +115,7 @@ class ODE_UYC(Kern):
|
|||
elif i==1:
|
||||
Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
|
||||
else:
|
||||
raise ValueError, "invalid input/output index"
|
||||
raise ValueError("invalid input/output index")
|
||||
#Kdiag[slices[0][0]]+= self.variance_U #matern32 diag
|
||||
#Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag
|
||||
return Kdiag
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
import numpy as np
|
||||
from independent_outputs import index_to_slices
|
||||
from .independent_outputs import index_to_slices
|
||||
|
||||
|
||||
class ODE_st(Kern):
|
||||
|
|
@ -135,7 +135,7 @@ class ODE_st(Kern):
|
|||
Kdiag[s1]+= b**2*k1 - 2*a*c*k2 + a**2*k3 + c**2*vyt*vyx
|
||||
#Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
|
||||
else:
|
||||
raise ValueError, "invalid input/output index"
|
||||
raise ValueError("invalid input/output index")
|
||||
|
||||
return Kdiag
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
from kern import Kern
|
||||
from .kern import Kern
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
import numpy as np
|
||||
from independent_outputs import index_to_slices
|
||||
from .independent_outputs import index_to_slices
|
||||
|
||||
|
||||
class ODE_t(Kern):
|
||||
|
|
@ -85,7 +85,7 @@ class ODE_t(Kern):
|
|||
Kdiag[s1]+= k1 + vyt+self.ubias
|
||||
#Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
|
||||
else:
|
||||
raise ValueError, "invalid input/output index"
|
||||
raise ValueError("invalid input/output index")
|
||||
|
||||
return Kdiag
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
import numpy as np
|
||||
import itertools
|
||||
from ...util.caching import Cache_this
|
||||
from kern import CombinationKernel
|
||||
from .kern import CombinationKernel
|
||||
from functools import reduce
|
||||
|
||||
class Add(CombinationKernel):
|
||||
"""
|
||||
|
|
@ -84,10 +85,10 @@ class Add(CombinationKernel):
|
|||
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
|
||||
#return psi2
|
||||
# compute the "cross" terms
|
||||
from static import White, Bias
|
||||
from rbf import RBF
|
||||
from .static import White, Bias
|
||||
from .rbf import RBF
|
||||
#from rbf_inv import RBFInv
|
||||
from linear import Linear
|
||||
from .linear import Linear
|
||||
#ffrom fixed import Fixed
|
||||
|
||||
for p1, p2 in itertools.combinations(self.parts, 2):
|
||||
|
|
@ -111,11 +112,11 @@ class Add(CombinationKernel):
|
|||
psi2 += np.einsum('nm,no->mo',tmp1,tmp2)+np.einsum('nm,no->mo',tmp2,tmp1)
|
||||
#(tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
|
||||
else:
|
||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||
raise NotImplementedError("psi2 cannot be computed for this kernel")
|
||||
return psi2
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
from .static import White, Bias
|
||||
for p1 in self.parts:
|
||||
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
|
|
@ -131,7 +132,7 @@ class Add(CombinationKernel):
|
|||
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
|
||||
def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
from .static import White, Bias
|
||||
target = np.zeros(Z.shape)
|
||||
for p1 in self.parts:
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
|
|
@ -149,7 +150,7 @@ class Add(CombinationKernel):
|
|||
return target
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
from .static import White, Bias
|
||||
target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters]
|
||||
for p1 in self.parameters:
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
|
|
@ -164,7 +165,7 @@ class Add(CombinationKernel):
|
|||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
|
||||
grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
[np.add(target_grads[i],grads[i],target_grads[i]) for i in xrange(len(grads))]
|
||||
[np.add(target_grads[i],grads[i],target_grads[i]) for i in range(len(grads))]
|
||||
return target_grads
|
||||
|
||||
def add(self, other):
|
||||
|
|
@ -180,9 +181,12 @@ class Add(CombinationKernel):
|
|||
|
||||
def input_sensitivity(self, summarize=True):
|
||||
if summarize:
|
||||
return reduce(np.add, [k.input_sensitivity(summarize) for k in self.parts])
|
||||
i_s = np.zeros((self.input_dim))
|
||||
for k in self.parts:
|
||||
i_s[k.active_dims] += k.input_sensitivity(summarize)
|
||||
return i_s
|
||||
else:
|
||||
i_s = np.zeros((len(self.parts), self.input_dim))
|
||||
from operator import setitem
|
||||
[setitem(i_s, (i, Ellipsis), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)]
|
||||
[setitem(i_s, (i, k.active_dims), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)]
|
||||
return i_s
|
||||
|
|
|
|||
183
GPy/kern/_src/basis_funcs.py
Normal file
183
GPy/kern/_src/basis_funcs.py
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
# #Copyright (c) 2012, Max Zwiessele (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
from .kern import Kern
|
||||
from ...core.parameterization.param import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
import numpy as np
|
||||
from ...util.caching import Cache_this
|
||||
from ...util.linalg import tdot, mdot
|
||||
|
||||
class BasisFuncKernel(Kern):
|
||||
def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'):
|
||||
"""
|
||||
Abstract superclass for kernels with explicit basis functions for use in GPy.
|
||||
|
||||
This class does NOT automatically add an offset to the design matrix phi!
|
||||
"""
|
||||
super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
|
||||
self.ARD = ARD
|
||||
if self.ARD:
|
||||
phi_test = self._phi(np.random.normal(0, 1, (1, self.input_dim)))
|
||||
variance = variance * np.ones(phi_test.shape[1])
|
||||
else:
|
||||
variance = np.array(variance)
|
||||
self.variance = Param('variance', variance, Logexp())
|
||||
self.link_parameter(self.variance)
|
||||
|
||||
def parameters_changed(self):
|
||||
self.alpha = np.sqrt(self.variance)
|
||||
self.beta = 1./self.variance
|
||||
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def phi(self, X):
|
||||
return self._phi(X)
|
||||
|
||||
def _phi(self, X):
|
||||
raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi')
|
||||
|
||||
def K(self, X, X2=None):
|
||||
return self._K(X, X2)
|
||||
|
||||
def Kdiag(self, X, X2=None):
|
||||
return np.diag(self._K(X, X2))
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
if self.ARD:
|
||||
phi1 = self.phi(X)
|
||||
if X2 is None or X is X2:
|
||||
self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi1)
|
||||
else:
|
||||
phi2 = self.phi(X2)
|
||||
self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2)
|
||||
else:
|
||||
self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) * self.beta
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
if self.ARD:
|
||||
phi1 = self.phi(X)
|
||||
self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1)
|
||||
else:
|
||||
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta
|
||||
|
||||
def concatenate_offset(self, X):
|
||||
return np.c_[np.ones((X.shape[0], 1)), X]
|
||||
|
||||
def posterior_inf(self, X=None, posterior=None):
|
||||
"""
|
||||
Do the posterior inference on the parameters given this kernels functions
|
||||
and the model posterior, which has to be a GPy posterior, usually found at m.posterior, if m is a GPy model.
|
||||
If not given we search for the the highest parent to be a model, containing the posterior, and for X accordingly.
|
||||
"""
|
||||
if X is None:
|
||||
try:
|
||||
X = self._highest_parent_.X
|
||||
except NameError:
|
||||
raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference")
|
||||
if posterior is None:
|
||||
try:
|
||||
posterior = self._highest_parent_.posterior
|
||||
except NameError:
|
||||
raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference")
|
||||
phi_alpha = self.phi(X) * self.variance
|
||||
return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha))
|
||||
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def _K(self, X, X2):
|
||||
if X2 is None or X is X2:
|
||||
phi = self.phi(X) * self.alpha
|
||||
if phi.ndim != 2:
|
||||
phi = phi[:, None]
|
||||
return tdot(phi)
|
||||
else:
|
||||
phi1 = self.phi(X) * self.alpha
|
||||
phi2 = self.phi(X2) * self.alpha
|
||||
if phi1.ndim != 2:
|
||||
phi1 = phi1[:, None]
|
||||
phi2 = phi2[:, None]
|
||||
return phi1.dot(phi2.T)
|
||||
|
||||
|
||||
class LinearSlopeBasisFuncKernel(BasisFuncKernel):
|
||||
def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'):
|
||||
"""
|
||||
A linear segment transformation. The segments start at start, \
|
||||
are then linear to stop and constant again. The segments are
|
||||
normalized, so that they have exactly as much mass above
|
||||
as below the origin.
|
||||
|
||||
Start and stop can be tuples or lists of starts and stops.
|
||||
Behaviour of start stop is as np.where(X<start) would do.
|
||||
"""
|
||||
|
||||
self.start = np.array(start)
|
||||
self.stop = np.array(stop)
|
||||
super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
|
||||
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def _phi(self, X):
|
||||
phi = np.where(X < self.start, self.start, X)
|
||||
phi = np.where(phi > self.stop, self.stop, phi)
|
||||
return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.
|
||||
|
||||
class ChangePointBasisFuncKernel(BasisFuncKernel):
|
||||
def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'):
|
||||
self.changepoint = np.array(changepoint)
|
||||
super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
|
||||
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def _phi(self, X):
|
||||
return np.where((X < self.changepoint), -1, 1)
|
||||
|
||||
class DomainKernel(LinearSlopeBasisFuncKernel):
|
||||
def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'):
|
||||
super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name)
|
||||
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def _phi(self, X):
|
||||
phi = np.where((X>self.start)*(X<self.stop), 1, 0)
|
||||
return phi#((phi-self.start)/(self.stop-self.start))-.5
|
||||
|
||||
class LogisticBasisFuncKernel(BasisFuncKernel):
|
||||
def __init__(self, input_dim, centers, variance=1., slope=1., active_dims=None, ARD=False, ARD_slope=True, name='logistic'):
|
||||
self.centers = np.atleast_2d(centers)
|
||||
self.ARD_slope = ARD_slope
|
||||
if self.ARD_slope:
|
||||
self.slope = Param('slope', slope * np.ones(self.centers.size), Logexp())
|
||||
else:
|
||||
self.slope = Param('slope', slope, Logexp())
|
||||
super(LogisticBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
|
||||
self.link_parameter(self.slope)
|
||||
|
||||
@Cache_this(limit=3, ignore_args=())
|
||||
def _phi(self, X):
|
||||
import scipy as sp
|
||||
phi = 1/(1+np.exp(-((X-self.centers)*self.slope)))
|
||||
return np.where(np.isnan(phi), 0, phi)#((phi-self.start)/(self.stop-self.start))-.5
|
||||
|
||||
def parameters_changed(self):
|
||||
BasisFuncKernel.parameters_changed(self)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
super(LogisticBasisFuncKernel, self).update_gradients_full(dL_dK, X, X2)
|
||||
if X2 is None or X is X2:
|
||||
phi1 = self.phi(X)
|
||||
if phi1.ndim != 2:
|
||||
phi1 = phi1[:, None]
|
||||
dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
|
||||
if self.ARD_slope:
|
||||
self.slope.gradient = self.variance * 2 * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi1_dl)
|
||||
else:
|
||||
self.slope.gradient = self.variance * 2 * (dL_dK * phi1.dot(dphi1_dl.T)).sum()
|
||||
else:
|
||||
phi1 = self.phi(X)
|
||||
phi2 = self.phi(X2)
|
||||
if phi1.ndim != 2:
|
||||
phi1 = phi1[:, None]
|
||||
phi2 = phi2[:, None]
|
||||
dphi1_dl = (phi1**2) * (np.exp(-((X-self.centers)*self.slope)) * (X-self.centers))
|
||||
dphi2_dl = (phi2**2) * (np.exp(-((X2-self.centers)*self.slope)) * (X2-self.centers))
|
||||
if self.ARD_slope:
|
||||
self.slope.gradient = (self.variance * np.einsum('ij,iq,jq->q', dL_dK, phi1, dphi2_dl) + np.einsum('ij,iq,jq->q', dL_dK, phi2, dphi1_dl))
|
||||
else:
|
||||
self.slope.gradient = self.variance * (dL_dK * phi1.dot(dphi2_dl.T)).sum() + (dL_dK * phi2.dot(dphi1_dl.T)).sum()
|
||||
self.slope.gradient = np.where(np.isnan(self.slope.gradient), 0, self.slope.gradient)
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
import numpy as np
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
# Copyright (c) 2012, James Hensman and Ricardo Andrade
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
import numpy as np
|
||||
from scipy import weave
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
from ...util.config import config # for assesing whether to use weave
|
||||
from ...util.config import config # for assesing whether to use cython
|
||||
import coregionalize_cython
|
||||
|
||||
class Coregionalize(Kern):
|
||||
"""
|
||||
|
|
@ -57,13 +57,8 @@ class Coregionalize(Kern):
|
|||
self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
|
||||
|
||||
def K(self, X, X2=None):
|
||||
if config.getboolean('weave', 'working'):
|
||||
try:
|
||||
return self._K_weave(X, X2)
|
||||
except:
|
||||
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
|
||||
config.set('weave', 'working', 'False')
|
||||
return self._K_numpy(X, X2)
|
||||
if config.getboolean('cython', 'working'):
|
||||
return self._K_cython(X, X2)
|
||||
else:
|
||||
return self._K_numpy(X, X2)
|
||||
|
||||
|
|
@ -76,36 +71,10 @@ class Coregionalize(Kern):
|
|||
index2 = np.asarray(X2, dtype=np.int)
|
||||
return self.B[index,index2.T]
|
||||
|
||||
def _K_weave(self, X, X2=None):
|
||||
"""compute the kernel function using scipy.weave"""
|
||||
index = np.asarray(X, dtype=np.int)
|
||||
|
||||
def _K_cython(self, X, X2=None):
|
||||
if X2 is None:
|
||||
target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64)
|
||||
code="""
|
||||
for(int i=0;i<N; i++){
|
||||
target[i+i*N] = B[index[i]+output_dim*index[i]];
|
||||
for(int j=0; j<i; j++){
|
||||
target[j+i*N] = B[index[i]+output_dim*index[j]];
|
||||
target[i+j*N] = target[j+i*N];
|
||||
}
|
||||
}
|
||||
"""
|
||||
N, B, output_dim = index.size, self.B, self.output_dim
|
||||
weave.inline(code, ['target', 'index', 'N', 'B', 'output_dim'])
|
||||
else:
|
||||
index2 = np.asarray(X2, dtype=np.int)
|
||||
target = np.empty((X.shape[0], X2.shape[0]), dtype=np.float64)
|
||||
code="""
|
||||
for(int i=0;i<num_inducing; i++){
|
||||
for(int j=0; j<N; j++){
|
||||
target[i+j*num_inducing] = B[output_dim*index[j]+index2[i]];
|
||||
}
|
||||
}
|
||||
"""
|
||||
N, num_inducing, B, output_dim = index.size, index2.size, self.B, self.output_dim
|
||||
weave.inline(code, ['target', 'index', 'index2', 'N', 'num_inducing', 'B', 'output_dim'])
|
||||
return target
|
||||
return coregionalize_cython.K_symmetric(self.B, np.asarray(X, dtype=np.int64)[:,0])
|
||||
return coregionalize_cython.K_asymmetric(self.B, np.asarray(X, dtype=np.int64)[:,0], np.asarray(X2, dtype=np.int64)[:,0])
|
||||
|
||||
|
||||
def Kdiag(self, X):
|
||||
|
|
@ -118,19 +87,13 @@ class Coregionalize(Kern):
|
|||
else:
|
||||
index2 = np.asarray(X2, dtype=np.int)
|
||||
|
||||
#attempt to use weave for a nasty double indexing loop: fall back to numpy
|
||||
if config.getboolean('weave', 'working'):
|
||||
try:
|
||||
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
|
||||
except:
|
||||
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
|
||||
config.set('weave', 'working', 'False')
|
||||
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
|
||||
#attempt to use cython for a nasty double indexing loop: fall back to numpy
|
||||
if config.getboolean('cython', 'working'):
|
||||
dL_dK_small = self._gradient_reduce_cython(dL_dK, index, index2)
|
||||
else:
|
||||
dL_dK_small = self._gradient_reduce_numpy(dL_dK, index, index2)
|
||||
|
||||
|
||||
|
||||
dkappa = np.diag(dL_dK_small)
|
||||
dL_dK_small += dL_dK_small.T
|
||||
dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
|
||||
|
|
@ -138,19 +101,6 @@ class Coregionalize(Kern):
|
|||
self.W.gradient = dW
|
||||
self.kappa.gradient = dkappa
|
||||
|
||||
def _gradient_reduce_weave(self, dL_dK, index, index2):
|
||||
dL_dK_small = np.zeros_like(self.B)
|
||||
code="""
|
||||
for(int i=0; i<num_inducing; i++){
|
||||
for(int j=0; j<N; j++){
|
||||
dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
|
||||
}
|
||||
}
|
||||
"""
|
||||
N, num_inducing, output_dim = index.size, index2.size, self.output_dim
|
||||
weave.inline(code, ['N', 'num_inducing', 'output_dim', 'dL_dK', 'dL_dK_small', 'index', 'index2'])
|
||||
return dL_dK_small
|
||||
|
||||
def _gradient_reduce_numpy(self, dL_dK, index, index2):
|
||||
index, index2 = index[:,0], index2[:,0]
|
||||
dL_dK_small = np.zeros_like(self.B)
|
||||
|
|
@ -160,9 +110,14 @@ class Coregionalize(Kern):
|
|||
dL_dK_small[j,i] = tmp1[:,index2==j].sum()
|
||||
return dL_dK_small
|
||||
|
||||
def _gradient_reduce_cython(self, dL_dK, index, index2):
|
||||
index, index2 = index[:,0], index2[:,0]
|
||||
return coregionalize_cython.gradient_reduce(self.B.shape[0], dL_dK, index, index2)
|
||||
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
index = np.asarray(X, dtype=np.int).flatten()
|
||||
dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in xrange(self.output_dim)])
|
||||
dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in range(self.output_dim)])
|
||||
self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None]
|
||||
self.kappa.gradient = dL_dKdiag_small
|
||||
|
||||
|
|
|
|||
6724
GPy/kern/_src/coregionalize_cython.c
Normal file
6724
GPy/kern/_src/coregionalize_cython.c
Normal file
File diff suppressed because it is too large
Load diff
34
GPy/kern/_src/coregionalize_cython.pyx
Normal file
34
GPy/kern/_src/coregionalize_cython.pyx
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
#cython: boundscheck=True
|
||||
#cython: wraparound=True
|
||||
import cython
|
||||
import numpy as np
|
||||
cimport numpy as np
|
||||
|
||||
def K_symmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X):
|
||||
cdef int N = X.size
|
||||
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, N))
|
||||
for n in range(N):
|
||||
for m in range(N):
|
||||
K[n,m] = B[X[n],X[m]]
|
||||
return K
|
||||
|
||||
def K_asymmetric(np.ndarray[double, ndim=2] B, np.ndarray[np.int64_t, ndim=1] X, np.ndarray[np.int64_t, ndim=1] X2):
|
||||
cdef int N = X.size
|
||||
cdef int M = X2.size
|
||||
cdef np.ndarray[np.double_t, ndim=2] K = np.empty((N, M))
|
||||
for n in range(N):
|
||||
for m in range(M):
|
||||
K[n,m] = B[X[n],X2[m]]
|
||||
return K
|
||||
|
||||
def gradient_reduce(int D, np.ndarray[double, ndim=2] dL_dK, np.ndarray[np.int64_t, ndim=1] index, np.ndarray[np.int64_t, ndim=1] index2):
|
||||
cdef np.ndarray[np.double_t, ndim=2] dL_dK_small = np.zeros((D, D))
|
||||
cdef int N = index.size
|
||||
cdef int M = index2.size
|
||||
for i in range(N):
|
||||
for j in range(M):
|
||||
dL_dK_small[index2[j],index[i]] += dL_dK[i,j];
|
||||
return dL_dK_small
|
||||
|
||||
|
||||
|
||||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
import numpy as np
|
||||
from scipy.special import wofz
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
from ...util.caching import Cache_this
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kern import Kern, CombinationKernel
|
||||
from .kern import Kern, CombinationKernel
|
||||
import numpy as np
|
||||
import itertools
|
||||
|
||||
|
|
@ -94,14 +94,18 @@ class IndependentOutputs(CombinationKernel):
|
|||
else:
|
||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
|
||||
if self.single_kern: kern.gradient = target
|
||||
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
|
||||
if self.single_kern:
|
||||
self.kern.gradient = target
|
||||
else:
|
||||
[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
|
||||
|
||||
def gradients_X(self,dL_dK, X, X2=None):
|
||||
target = np.zeros(X.shape)
|
||||
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
|
||||
if X2 is None:
|
||||
# TODO: make use of index_to_slices
|
||||
# FIXME: Broken as X is already sliced out
|
||||
print "Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!"
|
||||
values = np.unique(X[:,self.index_dim])
|
||||
slices = [X[:,self.index_dim]==i for i in values]
|
||||
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
|
||||
|
|
@ -142,7 +146,7 @@ class IndependentOutputs(CombinationKernel):
|
|||
if self.single_kern: target[:] += kern.gradient
|
||||
else: target[i][:] += kern.gradient
|
||||
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(kerns, slices))]
|
||||
if self.single_kern: kern.gradient = target
|
||||
if self.single_kern: self.kern.gradient = target
|
||||
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
|
||||
|
||||
class Hierarchical(CombinationKernel):
|
||||
|
|
|
|||
|
|
@ -4,17 +4,20 @@
|
|||
import sys
|
||||
import numpy as np
|
||||
from ...core.parameterization.parameterized import Parameterized
|
||||
from kernel_slice_operations import KernCallsViaSlicerMeta
|
||||
from .kernel_slice_operations import KernCallsViaSlicerMeta
|
||||
from ...util.caching import Cache_this
|
||||
from GPy.core.parameterization.observable_array import ObsAr
|
||||
from functools import reduce
|
||||
import six
|
||||
|
||||
|
||||
|
||||
@six.add_metaclass(KernCallsViaSlicerMeta)
|
||||
class Kern(Parameterized):
|
||||
#===========================================================================
|
||||
# This adds input slice support. The rather ugly code for slicing can be
|
||||
# found in kernel_slice_operations
|
||||
__metaclass__ = KernCallsViaSlicerMeta
|
||||
# __meataclass__ is ignored in Python 3 - needs to be put in the function definiton
|
||||
#__metaclass__ = KernCallsViaSlicerMeta
|
||||
#Here, we use the Python module six to support Py3 and Py2 simultaneously
|
||||
#===========================================================================
|
||||
_support_GPU=False
|
||||
def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw):
|
||||
|
|
@ -178,7 +181,7 @@ class Kern(Parameterized):
|
|||
|
||||
"""
|
||||
assert isinstance(other, Kern), "only kernels can be added to kernels..."
|
||||
from add import Add
|
||||
from .add import Add
|
||||
return Add([self, other], name=name)
|
||||
|
||||
def __mul__(self, other):
|
||||
|
|
@ -210,7 +213,7 @@ class Kern(Parameterized):
|
|||
|
||||
"""
|
||||
assert isinstance(other, Kern), "only kernels can be multiplied to kernels..."
|
||||
from prod import Prod
|
||||
from .prod import Prod
|
||||
#kernels = []
|
||||
#if isinstance(self, Prod): kernels.extend(self.parameters)
|
||||
#else: kernels.append(self)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
|
||||
import numpy as np
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
from ...util.linalg import tdot
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
import numpy as np
|
||||
|
|
|
|||
|
|
@ -3,11 +3,12 @@
|
|||
|
||||
|
||||
import numpy as np
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
from ...util.linalg import mdot
|
||||
from ...util.decorators import silence_errors
|
||||
from ...core.parameterization.param import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
from functools import reduce
|
||||
|
||||
class Periodic(Kern):
|
||||
def __init__(self, input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name):
|
||||
|
|
@ -67,8 +68,6 @@ class Periodic(Kern):
|
|||
return np.diag(self.K(X))
|
||||
|
||||
|
||||
|
||||
|
||||
class PeriodicExponential(Periodic):
|
||||
"""
|
||||
Kernel of the periodic subspace (up to a given frequency) of a exponential
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
class Poly(Kern):
|
||||
|
|
|
|||
|
|
@ -2,9 +2,24 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from kern import CombinationKernel
|
||||
from .kern import CombinationKernel
|
||||
from ...util.caching import Cache_this
|
||||
import itertools
|
||||
from functools import reduce
|
||||
|
||||
|
||||
def numpy_invalid_op_as_exception(func):
|
||||
"""
|
||||
A decorator that allows catching numpy invalid operations
|
||||
as exceptions (the default behaviour is raising warnings).
|
||||
"""
|
||||
def func_wrapper(*args, **kwargs):
|
||||
np.seterr(invalid='raise')
|
||||
result = func(*args, **kwargs)
|
||||
np.seterr(invalid='warn')
|
||||
return result
|
||||
return func_wrapper
|
||||
|
||||
|
||||
class Prod(CombinationKernel):
|
||||
"""
|
||||
|
|
@ -46,18 +61,20 @@ class Prod(CombinationKernel):
|
|||
self.parts[0].update_gradients_full(dL_dK*self.parts[1].K(X,X2), X, X2)
|
||||
self.parts[1].update_gradients_full(dL_dK*self.parts[0].K(X,X2), X, X2)
|
||||
else:
|
||||
k = self.K(X,X2)*dL_dK
|
||||
for p in self.parts:
|
||||
p.update_gradients_full(k/p.K(X,X2),X,X2)
|
||||
for combination in itertools.combinations(self.parts, len(self.parts) - 1):
|
||||
prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
|
||||
to_update = list(set(self.parts) - set(combination))[0]
|
||||
to_update.update_gradients_full(dL_dK * prod, X, X2)
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
if len(self.parts)==2:
|
||||
self.parts[0].update_gradients_diag(dL_dKdiag*self.parts[1].Kdiag(X), X)
|
||||
self.parts[1].update_gradients_diag(dL_dKdiag*self.parts[0].Kdiag(X), X)
|
||||
else:
|
||||
k = self.Kdiag(X)*dL_dKdiag
|
||||
for p in self.parts:
|
||||
p.update_gradients_diag(k/p.Kdiag(X),X)
|
||||
for combination in itertools.combinations(self.parts, len(self.parts) - 1):
|
||||
prod = reduce(np.multiply, [p.Kdiag(X) for p in combination])
|
||||
to_update = list(set(self.parts) - set(combination))[0]
|
||||
to_update.update_gradients_diag(dL_dKdiag * prod, X)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
target = np.zeros(X.shape)
|
||||
|
|
@ -65,9 +82,10 @@ class Prod(CombinationKernel):
|
|||
target += self.parts[0].gradients_X(dL_dK*self.parts[1].K(X, X2), X, X2)
|
||||
target += self.parts[1].gradients_X(dL_dK*self.parts[0].K(X, X2), X, X2)
|
||||
else:
|
||||
k = self.K(X,X2)*dL_dK
|
||||
for p in self.parts:
|
||||
target += p.gradients_X(k/p.K(X,X2),X,X2)
|
||||
for combination in itertools.combinations(self.parts, len(self.parts) - 1):
|
||||
prod = reduce(np.multiply, [p.K(X, X2) for p in combination])
|
||||
to_update = list(set(self.parts) - set(combination))[0]
|
||||
target += to_update.gradients_X(dL_dK * prod, X, X2)
|
||||
return target
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
|
|
@ -80,3 +98,5 @@ class Prod(CombinationKernel):
|
|||
for p in self.parts:
|
||||
target += p.gradients_X_diag(k/p.Kdiag(X),X)
|
||||
return target
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@
|
|||
from ....core.parameterization.parameter_core import Pickleable
|
||||
from GPy.util.caching import Cache_this
|
||||
from ....core.parameterization import variational
|
||||
import rbf_psi_comp
|
||||
import ssrbf_psi_comp
|
||||
import sslinear_psi_comp
|
||||
import linear_psi_comp
|
||||
from . import rbf_psi_comp
|
||||
from . import ssrbf_psi_comp
|
||||
from . import sslinear_psi_comp
|
||||
from . import linear_psi_comp
|
||||
|
||||
class PSICOMP_RBF(Pickleable):
|
||||
@Cache_this(limit=2, ignore_args=(0,))
|
||||
|
|
@ -17,7 +17,7 @@ class PSICOMP_RBF(Pickleable):
|
|||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
raise ValueError, "unknown distriubtion received for psi-statistics"
|
||||
raise ValueError("unknown distriubtion received for psi-statistics")
|
||||
|
||||
@Cache_this(limit=2, ignore_args=(0,1,2,3))
|
||||
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
|
|
@ -26,7 +26,7 @@ class PSICOMP_RBF(Pickleable):
|
|||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
raise ValueError, "unknown distriubtion received for psi-statistics"
|
||||
raise ValueError("unknown distriubtion received for psi-statistics")
|
||||
|
||||
def _setup_observers(self):
|
||||
pass
|
||||
|
|
@ -40,7 +40,7 @@ class PSICOMP_Linear(Pickleable):
|
|||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
return sslinear_psi_comp.psicomputations(variance, Z, variational_posterior)
|
||||
else:
|
||||
raise ValueError, "unknown distriubtion received for psi-statistics"
|
||||
raise ValueError("unknown distriubtion received for psi-statistics")
|
||||
|
||||
@Cache_this(limit=2, ignore_args=(0,1,2,3))
|
||||
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
|
||||
|
|
@ -49,7 +49,7 @@ class PSICOMP_Linear(Pickleable):
|
|||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
|
||||
else:
|
||||
raise ValueError, "unknown distriubtion received for psi-statistics"
|
||||
raise ValueError("unknown distriubtion received for psi-statistics")
|
||||
|
||||
def _setup_observers(self):
|
||||
pass
|
||||
|
|
@ -37,11 +37,11 @@ def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variati
|
|||
|
||||
# Compute for psi0 and psi1
|
||||
mu2S = np.square(mu)+S
|
||||
dL_dvar += np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu)
|
||||
dL_dgamma += np.einsum('n,q,nq->nq',dL_dpsi0,variance,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,variance,Z,mu)
|
||||
dL_dmu += np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*variance,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,variance,Z)
|
||||
dL_dS += np.einsum('n,nq,q->nq',dL_dpsi0,gamma,variance)
|
||||
dL_dZ += np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, variance,mu)
|
||||
dL_dvar += (dL_dpsi0[:,None]*gamma*mu2S).sum(axis=0) + (dL_dpsi1.T.dot(gamma*mu)*Z).sum(axis=0)
|
||||
dL_dgamma += dL_dpsi0[:,None]*variance*mu2S+ dL_dpsi1.dot(Z)*mu*variance
|
||||
dL_dmu += dL_dpsi0[:,None]*2.*variance*gamma*mu + dL_dpsi1.dot(Z)*gamma*variance
|
||||
dL_dS += dL_dpsi0[:,None]*variance*gamma
|
||||
dL_dZ += dL_dpsi1.T.dot(gamma*mu)*variance
|
||||
|
||||
return dL_dvar, dL_dZ, dL_dmu, dL_dS, dL_dgamma
|
||||
|
||||
|
|
@ -64,29 +64,23 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma):
|
|||
gamma2 = np.square(gamma)
|
||||
variance2 = np.square(variance)
|
||||
mu2S = mu2+S # NxQ
|
||||
gvm = np.einsum('nq,nq,q->nq',gamma,mu,variance)
|
||||
common_sum = np.einsum('nq,mq->nm',gvm,Z)
|
||||
# common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM
|
||||
Z_expect = np.einsum('mo,mq,oq->q',dL_dpsi2,Z,Z)
|
||||
gvm = gamma*mu*variance
|
||||
common_sum = gvm.dot(Z.T)
|
||||
Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
|
||||
Z_expect_var2 = Z_expect*variance2
|
||||
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
|
||||
tmp = np.einsum('mo,oq->mq',dL_dpsi2T,Z)
|
||||
common_expect = np.einsum('mq,nm->nq',tmp,common_sum)
|
||||
# common_expect = np.einsum('mo,mq,no->nq',dL_dpsi2+dL_dpsi2.T,Z,common_sum)
|
||||
Z2_expect = np.einsum('om,nm->no',dL_dpsi2T,common_sum)
|
||||
Z1_expect = np.einsum('om,mq->oq',dL_dpsi2T,Z)
|
||||
common_expect = common_sum.dot(dL_dpsi2T).dot(Z)
|
||||
Z2_expect = common_sum.dot(dL_dpsi2T)
|
||||
Z1_expect = dL_dpsi2T.dot(Z)
|
||||
|
||||
dL_dvar = np.einsum('nq,q,q->q',2.*(gamma*mu2S-gamma2*mu2),variance,Z_expect)+\
|
||||
np.einsum('nq,nq,nq->q',common_expect,gamma,mu)
|
||||
dL_dvar = variance*Z_expect*2.*(gamma*mu2S-gamma2*mu2).sum(axis=0)+(common_expect*gamma*mu).sum(axis=0)
|
||||
|
||||
dL_dgamma = np.einsum('q,q,nq->nq',Z_expect,variance2,(mu2S-2.*gamma*mu2))+\
|
||||
np.einsum('nq,q,nq->nq',common_expect,variance,mu)
|
||||
dL_dgamma = Z_expect_var2*(mu2S-2.*gamma*mu2)+common_expect*mu*variance
|
||||
|
||||
dL_dmu = np.einsum('q,q,nq,nq->nq',Z_expect,variance2,mu,2.*(gamma-gamma2))+\
|
||||
np.einsum('nq,nq,q->nq',common_expect,gamma,variance)
|
||||
dL_dmu = Z_expect_var2*mu*2.*(gamma-gamma2) + common_expect*gamma*variance
|
||||
|
||||
dL_dS = np.einsum('q,nq,q->nq',Z_expect,gamma,variance2)
|
||||
dL_dS = gamma*Z_expect_var2
|
||||
|
||||
# dL_dZ = 2.*(np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma,variance2,Z,(mu2S-gamma*mu2))+np.einsum('om,nq,q,nq,nm->oq',dL_dpsi2,gamma,variance,mu,common_sum))
|
||||
dL_dZ = Z1_expect*np.einsum('nq,q,nq->q',gamma,variance2,(mu2S-gamma*mu2))+np.einsum('nq,q,nq,nm->mq',gamma,variance,mu,Z2_expect)
|
||||
dL_dZ = (gamma*(mu2S-gamma*mu2)).sum(axis=0)*variance2*Z1_expect+ Z2_expect.T.dot(gamma*mu)*variance
|
||||
|
||||
return dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ
|
||||
|
|
|
|||
|
|
@ -22,12 +22,14 @@ try:
|
|||
# _psi1 NxM
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
|
||||
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||
l2 = np.square(lengthscale)
|
||||
log_denom1 = np.log(S/l2+1)
|
||||
log_denom2 = np.log(2*S/l2+1)
|
||||
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
|
||||
log_gamma = np.log(gamma)
|
||||
log_gamma1 = np.log(1.-gamma)
|
||||
variance = float(variance)
|
||||
psi0 = np.empty(N)
|
||||
psi0[:] = variance
|
||||
|
|
@ -37,6 +39,7 @@ try:
|
|||
from ....util.misc import param_to_array
|
||||
S = param_to_array(S)
|
||||
mu = param_to_array(mu)
|
||||
gamma = param_to_array(gamma)
|
||||
Z = param_to_array(Z)
|
||||
|
||||
support_code = """
|
||||
|
|
@ -79,7 +82,7 @@ try:
|
|||
}
|
||||
}
|
||||
"""
|
||||
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
|
||||
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
|
||||
|
||||
psi2 = psi2n.sum(axis=0)
|
||||
return psi0,psi1,psi2,psi2n
|
||||
|
|
@ -94,12 +97,13 @@ try:
|
|||
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||
l2 = np.square(lengthscale)
|
||||
log_denom1 = np.log(S/l2+1)
|
||||
log_denom2 = np.log(2*S/l2+1)
|
||||
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
|
||||
gamma, gamma1 = variational_posterior.gamma_probabilities()
|
||||
log_gamma = np.log(gamma)
|
||||
log_gamma1 = np.log(1.-gamma)
|
||||
variance = float(variance)
|
||||
|
||||
dvar = np.zeros(1)
|
||||
|
|
@ -113,6 +117,7 @@ try:
|
|||
from ....util.misc import param_to_array
|
||||
S = param_to_array(S)
|
||||
mu = param_to_array(mu)
|
||||
gamma = param_to_array(gamma)
|
||||
Z = param_to_array(Z)
|
||||
|
||||
support_code = """
|
||||
|
|
@ -130,7 +135,6 @@ try:
|
|||
double Zm1q = Z(m1,q);
|
||||
double Zm2q = Z(m2,q);
|
||||
double gnq = gamma(n,q);
|
||||
double g1nq = gamma1(n,q);
|
||||
double mu_nq = mu(n,q);
|
||||
|
||||
if(m2==0) {
|
||||
|
|
@ -156,7 +160,7 @@ try:
|
|||
|
||||
dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
|
||||
dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
|
||||
dgamma(n,q) += lpsi1*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
|
||||
dgamma(n,q) += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
|
||||
dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum);
|
||||
dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
|
||||
}
|
||||
|
|
@ -184,7 +188,7 @@ try:
|
|||
|
||||
dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
|
||||
dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
|
||||
dgamma(n,q) += lpsi2*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
|
||||
dgamma(n,q) += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
|
||||
dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
|
||||
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
|
||||
}
|
||||
|
|
@ -192,7 +196,7 @@ try:
|
|||
}
|
||||
}
|
||||
"""
|
||||
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','gamma1','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
|
||||
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
|
||||
|
||||
dl *= 2.*lengthscale
|
||||
if not ARD:
|
||||
|
|
|
|||
|
|
@ -3,9 +3,9 @@
|
|||
|
||||
|
||||
import numpy as np
|
||||
from stationary import Stationary
|
||||
from psi_comp import PSICOMP_RBF
|
||||
from psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU
|
||||
from .stationary import Stationary
|
||||
from .psi_comp import PSICOMP_RBF
|
||||
from .psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU
|
||||
from ...util.config import *
|
||||
|
||||
class RBF(Stationary):
|
||||
|
|
|
|||
|
|
@ -3,11 +3,11 @@ A new kernel
|
|||
"""
|
||||
|
||||
import numpy as np
|
||||
from kern import Kern,CombinationKernel
|
||||
from .kern import Kern,CombinationKernel
|
||||
from .independent_outputs import index_to_slices
|
||||
import itertools
|
||||
|
||||
class DiffGenomeKern(Kern):
|
||||
class DEtime(Kern):
|
||||
|
||||
def __init__(self, kernel, idx_p, Xp, index_dim=-1, name='DiffGenomeKern'):
|
||||
self.idx_p = idx_p
|
||||
|
|
@ -104,7 +104,7 @@ class SplitKern(CombinationKernel):
|
|||
assert len(slices2)<=2, 'The Split kernel only support two different indices'
|
||||
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||
# diagonal blocks
|
||||
[[target.__setitem__((s,s2), self.kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[i], slices2[i])] for i in xrange(min(len(slices),len(slices2)))]
|
||||
[[target.__setitem__((s,s2), self.kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[i], slices2[i])] for i in range(min(len(slices),len(slices2)))]
|
||||
if len(slices)>1:
|
||||
[target.__setitem__((s,s2), self.kern_cross.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[1], slices2[0])]
|
||||
if len(slices2)>1:
|
||||
|
|
@ -135,7 +135,7 @@ class SplitKern(CombinationKernel):
|
|||
else:
|
||||
assert dL_dK.shape==(X.shape[0],X2.shape[0])
|
||||
slices2 = index_to_slices(X2[:,self.index_dim])
|
||||
[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s,s2 in itertools.product(slices[i], slices2[i])] for i in xrange(min(len(slices),len(slices2)))]
|
||||
[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s,s2 in itertools.product(slices[i], slices2[i])] for i in range(min(len(slices),len(slices2)))]
|
||||
if len(slices)>1:
|
||||
[collate_grads(dL_dK[s,s2], X[s], X2[s2], True) for s,s2 in itertools.product(slices[1], slices2[0])]
|
||||
if len(slices2)>1:
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
import numpy as np
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
|
|
@ -60,7 +60,10 @@ class White(Static):
|
|||
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
if X2 is None:
|
||||
self.variance.gradient = np.trace(dL_dK)
|
||||
else:
|
||||
self.variance.gradient = 0.
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
self.variance.gradient = dL_dKdiag.sum()
|
||||
|
|
@ -106,7 +109,7 @@ class Fixed(Static):
|
|||
return self.variance * self.fixed_K
|
||||
|
||||
def Kdiag(self, X):
|
||||
return self.variance * self.fixed_K.diag()
|
||||
return self.variance * self.fixed_K.diagonal()
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
|
||||
|
|
|
|||
|
|
@ -2,16 +2,23 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
from ...util.linalg import tdot
|
||||
from ... import util
|
||||
import numpy as np
|
||||
from scipy import integrate, weave
|
||||
from ...util.config import config # for assesing whether to use weave
|
||||
from scipy import integrate
|
||||
from ...util.config import config # for assesing whether to use cython
|
||||
from ...util.caching import Cache_this
|
||||
|
||||
try:
|
||||
import stationary_cython
|
||||
except ImportError:
|
||||
print('warning: failed to import cython module: falling back to numpy')
|
||||
config.set('cython', 'working', 'false')
|
||||
|
||||
|
||||
class Stationary(Kern):
|
||||
"""
|
||||
Stationary kernels (covariance functions).
|
||||
|
|
@ -65,10 +72,10 @@ class Stationary(Kern):
|
|||
self.link_parameters(self.variance, self.lengthscale)
|
||||
|
||||
def K_of_r(self, r):
|
||||
raise NotImplementedError, "implement the covariance function as a fn of r to use this class"
|
||||
raise NotImplementedError("implement the covariance function as a fn of r to use this class")
|
||||
|
||||
def dK_dr(self, r):
|
||||
raise NotImplementedError, "implement derivative of the covariance function wrt r to use this class"
|
||||
raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
|
||||
|
||||
@Cache_this(limit=5, ignore_args=())
|
||||
def K(self, X, X2=None):
|
||||
|
|
@ -148,28 +155,18 @@ class Stationary(Kern):
|
|||
(dL_dK), compute the gradient wrt the parameters of this kernel,
|
||||
and store in the parameters object as e.g. self.variance.gradient
|
||||
"""
|
||||
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
|
||||
self.variance.gradient = np.sum(self.K(X, X2)* dL_dK)/self.variance
|
||||
|
||||
#now the lengthscale gradient(s)
|
||||
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
||||
if self.ARD:
|
||||
#rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2)
|
||||
#d = X[:, None, :] - X2[None, :, :]
|
||||
#x_xl3 = np.square(d)
|
||||
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
|
||||
|
||||
tmp = dL_dr*self._inv_dist(X, X2)
|
||||
if X2 is None: X2 = X
|
||||
|
||||
|
||||
if config.getboolean('weave', 'working'):
|
||||
try:
|
||||
self.lengthscale.gradient = self.weave_lengthscale_grads(tmp, X, X2)
|
||||
except:
|
||||
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
|
||||
config.set('weave', 'working', 'False')
|
||||
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
|
||||
if config.getboolean('cython', 'working'):
|
||||
self.lengthscale.gradient = self._lengthscale_grads_cython(tmp, X, X2)
|
||||
else:
|
||||
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
|
||||
self.lengthscale.gradient = self._lengthscale_grads_pure(tmp, X, X2)
|
||||
else:
|
||||
r = self._scaled_dist(X, X2)
|
||||
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
|
||||
|
|
@ -184,43 +181,27 @@ class Stationary(Kern):
|
|||
dist = self._scaled_dist(X, X2).copy()
|
||||
return 1./np.where(dist != 0., dist, np.inf)
|
||||
|
||||
def weave_lengthscale_grads(self, tmp, X, X2):
|
||||
"""Use scipy.weave to compute derivatives wrt the lengthscales"""
|
||||
def _lengthscale_grads_pure(self, tmp, X, X2):
|
||||
return -np.array([np.sum(tmp * np.square(X[:,q:q+1] - X2[:,q:q+1].T)) for q in range(self.input_dim)])/self.lengthscale**3
|
||||
|
||||
def _lengthscale_grads_cython(self, tmp, X, X2):
|
||||
N,M = tmp.shape
|
||||
Q = X.shape[1]
|
||||
if hasattr(X, 'values'):X = X.values
|
||||
if hasattr(X2, 'values'):X2 = X2.values
|
||||
Q = self.input_dim
|
||||
X, X2 = np.ascontiguousarray(X), np.ascontiguousarray(X2)
|
||||
grads = np.zeros(self.input_dim)
|
||||
code = """
|
||||
double gradq;
|
||||
for(int q=0; q<Q; q++){
|
||||
gradq = 0;
|
||||
for(int n=0; n<N; n++){
|
||||
for(int m=0; m<M; m++){
|
||||
gradq += tmp(n,m)*(X(n,q)-X2(m,q))*(X(n,q)-X2(m,q));
|
||||
}
|
||||
}
|
||||
grads(q) = gradq;
|
||||
}
|
||||
"""
|
||||
weave.inline(code, ['tmp', 'X', 'X2', 'grads', 'N', 'M', 'Q'], type_converters=weave.converters.blitz, support_code="#include <math.h>")
|
||||
stationary_cython.lengthscale_grads(N, M, Q, tmp, X, X2, grads)
|
||||
return -grads/self.lengthscale**3
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
"""
|
||||
Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
|
||||
"""
|
||||
if config.getboolean('weave', 'working'):
|
||||
try:
|
||||
return self.gradients_X_weave(dL_dK, X, X2)
|
||||
except:
|
||||
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
|
||||
config.set('weave', 'working', 'False')
|
||||
return self.gradients_X_(dL_dK, X, X2)
|
||||
if config.getboolean('cython', 'working'):
|
||||
return self._gradients_X_cython(dL_dK, X, X2)
|
||||
else:
|
||||
return self.gradients_X_(dL_dK, X, X2)
|
||||
return self._gradients_X_pure(dL_dK, X, X2)
|
||||
|
||||
def gradients_X_(self, dL_dK, X, X2=None):
|
||||
def _gradients_X_pure(self, dL_dK, X, X2=None):
|
||||
invdist = self._inv_dist(X, X2)
|
||||
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
||||
tmp = invdist*dL_dr
|
||||
|
|
@ -230,54 +211,25 @@ class Stationary(Kern):
|
|||
|
||||
#The high-memory numpy way:
|
||||
#d = X[:, None, :] - X2[None, :, :]
|
||||
#ret = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2
|
||||
#grad = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2
|
||||
|
||||
#the lower memory way with a loop
|
||||
ret = np.empty(X.shape, dtype=np.float64)
|
||||
for q in xrange(self.input_dim):
|
||||
np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=ret[:,q])
|
||||
ret /= self.lengthscale**2
|
||||
grad = np.empty(X.shape, dtype=np.float64)
|
||||
for q in range(self.input_dim):
|
||||
np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=grad[:,q])
|
||||
return grad/self.lengthscale**2
|
||||
|
||||
return ret
|
||||
|
||||
def gradients_X_weave(self, dL_dK, X, X2=None):
|
||||
def _gradients_X_cython(self, dL_dK, X, X2=None):
|
||||
invdist = self._inv_dist(X, X2)
|
||||
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
|
||||
tmp = invdist*dL_dr
|
||||
if X2 is None:
|
||||
tmp = tmp + tmp.T
|
||||
X2 = X
|
||||
|
||||
code = """
|
||||
int n,m,d;
|
||||
double retnd;
|
||||
#pragma omp parallel for private(n,d, retnd, m)
|
||||
for(d=0;d<D;d++){
|
||||
for(n=0;n<N;n++){
|
||||
retnd = 0.0;
|
||||
for(m=0;m<M;m++){
|
||||
retnd += tmp(n,m)*(X(n,d)-X2(m,d));
|
||||
}
|
||||
ret(n,d) = retnd;
|
||||
}
|
||||
}
|
||||
|
||||
"""
|
||||
if hasattr(X, 'values'):X = X.values #remove the GPy wrapping to make passing into weave safe
|
||||
if hasattr(X2, 'values'):X2 = X2.values
|
||||
ret = np.zeros(X.shape)
|
||||
N,D = X.shape
|
||||
N,M = tmp.shape
|
||||
from scipy import weave
|
||||
support_code = """
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
"""
|
||||
weave_options = {'headers' : ['<omp.h>'],
|
||||
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
|
||||
'extra_link_args' : ['-lgomp']}
|
||||
weave.inline(code, ['ret', 'N', 'D', 'M', 'tmp', 'X', 'X2'], type_converters=weave.converters.blitz, support_code=support_code, **weave_options)
|
||||
return ret/self.lengthscale**2
|
||||
X, X2 = np.ascontiguousarray(X), np.ascontiguousarray(X2)
|
||||
grad = np.zeros(X.shape)
|
||||
stationary_cython.grad_X(X.shape[0], X.shape[1], X2.shape[0], X, X2, tmp, grad)
|
||||
return grad/self.lengthscale**2
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
return np.zeros(X.shape)
|
||||
|
|
@ -285,6 +237,9 @@ class Stationary(Kern):
|
|||
def input_sensitivity(self, summarize=True):
|
||||
return self.variance*np.ones(self.input_dim)/self.lengthscale**2
|
||||
|
||||
|
||||
|
||||
|
||||
class Exponential(Stationary):
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):
|
||||
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
|
|
@ -296,6 +251,8 @@ class Exponential(Stationary):
|
|||
return -0.5*self.K_of_r(r)
|
||||
|
||||
|
||||
|
||||
|
||||
class OU(Stationary):
|
||||
"""
|
||||
OU kernel:
|
||||
|
|
|
|||
6011
GPy/kern/_src/stationary_cython.c
Normal file
6011
GPy/kern/_src/stationary_cython.c
Normal file
File diff suppressed because it is too large
Load diff
36
GPy/kern/_src/stationary_cython.pyx
Normal file
36
GPy/kern/_src/stationary_cython.pyx
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
#cython: boundscheck=False
|
||||
#cython: wraparound=False
|
||||
import numpy as np
|
||||
cimport numpy as np
|
||||
|
||||
ctypedef np.float64_t DTYPE_t
|
||||
|
||||
cdef extern from "stationary_utils.h":
|
||||
void _grad_X "_grad_X" (int N, int D, int M, double* X, double* X2, double* tmp, double* grad)
|
||||
|
||||
cdef extern from "stationary_utils.h":
|
||||
void _lengthscale_grads "_lengthscale_grads" (int N, int M, int Q, double* tmp, double* X, double* X2, double* grad)
|
||||
|
||||
def grad_X(int N, int D, int M,
|
||||
np.ndarray[DTYPE_t, ndim=2] _X,
|
||||
np.ndarray[DTYPE_t, ndim=2] _X2,
|
||||
np.ndarray[DTYPE_t, ndim=2] _tmp,
|
||||
np.ndarray[DTYPE_t, ndim=2] _grad):
|
||||
cdef double *X = <double*> _X.data
|
||||
cdef double *X2 = <double*> _X2.data
|
||||
cdef double *tmp = <double*> _tmp.data
|
||||
cdef double *grad = <double*> _grad.data
|
||||
_grad_X(N, D, M, X, X2, tmp, grad) # return nothing, work in place.
|
||||
|
||||
def lengthscale_grads(int N, int M, int Q,
|
||||
np.ndarray[DTYPE_t, ndim=2] _tmp,
|
||||
np.ndarray[DTYPE_t, ndim=2] _X,
|
||||
np.ndarray[DTYPE_t, ndim=2] _X2,
|
||||
np.ndarray[DTYPE_t, ndim=1] _grad):
|
||||
cdef double *tmp = <double*> _tmp.data
|
||||
cdef double *X = <double*> _X.data
|
||||
cdef double *X2 = <double*> _X2.data
|
||||
cdef double *grad = <double*> _grad.data
|
||||
_lengthscale_grads(N, M, Q, tmp, X, X2, grad) # return nothing, work in place.
|
||||
|
||||
|
||||
35
GPy/kern/_src/stationary_utils.c
Normal file
35
GPy/kern/_src/stationary_utils.c
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
void _grad_X(int N, int D, int M, double* X, double* X2, double* tmp, double* grad){
|
||||
int n,m,d;
|
||||
double retnd;
|
||||
//#pragma omp parallel for private(n,d, retnd, m)
|
||||
for(d=0;d<D;d++){
|
||||
for(n=0;n<N;n++){
|
||||
retnd = 0.0;
|
||||
for(m=0;m<M;m++){
|
||||
retnd += tmp[n*M+m]*(X[n*D+d]-X2[m*D+d]);
|
||||
}
|
||||
grad[n*D+d] = retnd;
|
||||
}
|
||||
}
|
||||
} //grad_X
|
||||
|
||||
|
||||
void _lengthscale_grads(int N, int M, int Q, double* tmp, double* X, double* X2, double* grad){
|
||||
int n,m,q;
|
||||
double gradq, dist;
|
||||
#pragma omp parallel for private(n,m, gradq, dist)
|
||||
for(q=0; q<Q; q++){
|
||||
gradq = 0;
|
||||
for(n=0; n<N; n++){
|
||||
for(m=0; m<M; m++){
|
||||
dist = X[n*Q+q]-X2[m*Q+q];
|
||||
gradq += tmp[n*M+m]*dist*dist;
|
||||
}
|
||||
}
|
||||
grad[q] = gradq;
|
||||
}
|
||||
} //lengthscale_grads
|
||||
|
||||
|
||||
|
||||
|
||||
3
GPy/kern/_src/stationary_utils.h
Normal file
3
GPy/kern/_src/stationary_utils.h
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
#include <omp.h>
|
||||
void _grad_X(int N, int D, int M, double*X, double* X2, double* tmp, double* grad);
|
||||
void _lengthscale_grads(int N, int D, int M, double* X, double* X2, double* tmp, double* grad);
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
# Check Matthew Rocklin's blog post.
|
||||
import sympy as sym
|
||||
import numpy as np
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
from ...core.symbolic import Symbolic_core
|
||||
|
||||
|
||||
|
|
@ -11,7 +11,7 @@ class Symbolic(Kern, Symbolic_core):
|
|||
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', parameters=None, active_dims=None, operators=None, func_modules=[]):
|
||||
|
||||
if k is None:
|
||||
raise ValueError, "You must provide an argument for the covariance function."
|
||||
raise ValueError("You must provide an argument for the covariance function.")
|
||||
|
||||
Kern.__init__(self, input_dim, active_dims, name=name)
|
||||
kdiag = k
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
|
||||
import numpy as np
|
||||
from kern import Kern
|
||||
from .kern import Kern
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
from ...util.caching import Cache_this
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
from bernoulli import Bernoulli
|
||||
from exponential import Exponential
|
||||
from gaussian import Gaussian
|
||||
from gamma import Gamma
|
||||
from poisson import Poisson
|
||||
from student_t import StudentT
|
||||
from likelihood import Likelihood
|
||||
from mixed_noise import MixedNoise
|
||||
from binomial import Binomial
|
||||
from .bernoulli import Bernoulli
|
||||
from .exponential import Exponential
|
||||
from .gaussian import Gaussian
|
||||
from .gamma import Gamma
|
||||
from .poisson import Poisson
|
||||
from .student_t import StudentT
|
||||
from .likelihood import Likelihood
|
||||
from .mixed_noise import MixedNoise
|
||||
from .binomial import Binomial
|
||||
|
||||
|
|
|
|||
|
|
@ -3,9 +3,8 @@
|
|||
|
||||
import numpy as np
|
||||
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
from scipy import stats
|
||||
from . import link_functions
|
||||
from .likelihood import Likelihood
|
||||
|
||||
class Bernoulli(Likelihood):
|
||||
"""
|
||||
|
|
@ -77,23 +76,22 @@ class Bernoulli(Likelihood):
|
|||
|
||||
return Z_hat, mu_hat, sigma2_hat
|
||||
|
||||
def variational_expectations(self, Y, m, v, gh_points=None):
|
||||
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
|
||||
if isinstance(self.gp_link, link_functions.Probit):
|
||||
|
||||
if gh_points is None:
|
||||
gh_x, gh_w = np.polynomial.hermite.hermgauss(20)
|
||||
gh_x, gh_w = self._gh_points()
|
||||
else:
|
||||
gh_x, gh_w = gh_points
|
||||
|
||||
from scipy import stats
|
||||
|
||||
shape = m.shape
|
||||
m,v,Y = m.flatten(), v.flatten(), Y.flatten()
|
||||
Ysign = np.where(Y==1,1,-1)
|
||||
X = gh_x[None,:]*np.sqrt(2.*v[:,None]) + (m*Ysign)[:,None]
|
||||
p = stats.norm.cdf(X)
|
||||
p = std_norm_cdf(X)
|
||||
p = np.clip(p, 1e-9, 1.-1e-9) # for numerical stability
|
||||
N = stats.norm.pdf(X)
|
||||
N = std_norm_pdf(X)
|
||||
F = np.log(p).dot(gh_w)
|
||||
NoverP = N/p
|
||||
dF_dm = (NoverP*Ysign[:,None]).dot(gh_w)
|
||||
|
|
@ -106,10 +104,10 @@ class Bernoulli(Likelihood):
|
|||
def predictive_mean(self, mu, variance, Y_metadata=None):
|
||||
|
||||
if isinstance(self.gp_link, link_functions.Probit):
|
||||
return stats.norm.cdf(mu/np.sqrt(1+variance))
|
||||
return std_norm_cdf(mu/np.sqrt(1+variance))
|
||||
|
||||
elif isinstance(self.gp_link, link_functions.Heaviside):
|
||||
return stats.norm.cdf(mu/np.sqrt(variance))
|
||||
return std_norm_cdf(mu/np.sqrt(variance))
|
||||
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
import numpy as np
|
||||
from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
from . import link_functions
|
||||
from .likelihood import Likelihood
|
||||
from scipy import special
|
||||
|
||||
class Binomial(Likelihood):
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@
|
|||
import numpy as np
|
||||
from scipy import stats,special
|
||||
import scipy as sp
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
from . import link_functions
|
||||
from .likelihood import Likelihood
|
||||
|
||||
class Exponential(Likelihood):
|
||||
"""
|
||||
|
|
@ -57,9 +57,8 @@ class Exponential(Likelihood):
|
|||
:rtype: float
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
log_objective = np.log(link_f) - y*link_f
|
||||
return np.sum(log_objective)
|
||||
return log_objective
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -77,7 +76,6 @@ class Exponential(Likelihood):
|
|||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
grad = 1./link_f - y
|
||||
#grad = y/(link_f**2) - 1./link_f
|
||||
return grad
|
||||
|
|
@ -103,7 +101,6 @@ class Exponential(Likelihood):
|
|||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
hess = -1./(link_f**2)
|
||||
#hess = -2*y/(link_f**3) + 1/(link_f**2)
|
||||
return hess
|
||||
|
|
@ -123,7 +120,6 @@ class Exponential(Likelihood):
|
|||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
d3lik_dlink3 = 2./(link_f**3)
|
||||
#d3lik_dlink3 = 6*y/(link_f**4) - 2./(link_f**3)
|
||||
return d3lik_dlink3
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ import numpy as np
|
|||
from scipy import stats,special
|
||||
import scipy as sp
|
||||
from ..core.parameterization import Param
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
from . import link_functions
|
||||
from .likelihood import Likelihood
|
||||
|
||||
class Gamma(Likelihood):
|
||||
"""
|
||||
|
|
@ -66,12 +66,11 @@ class Gamma(Likelihood):
|
|||
:rtype: float
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
#alpha = self.gp_link.transf(gp)*self.beta
|
||||
#return (1. - alpha)*np.log(obs) + self.beta*obs - alpha * np.log(self.beta) + np.log(special.gamma(alpha))
|
||||
alpha = link_f*self.beta
|
||||
log_objective = alpha*np.log(self.beta) - np.log(special.gamma(alpha)) + (alpha - 1)*np.log(y) - self.beta*y
|
||||
return np.sum(log_objective)
|
||||
return log_objective
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -90,7 +89,6 @@ class Gamma(Likelihood):
|
|||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
grad = self.beta*np.log(self.beta*y) - special.psi(self.beta*link_f)*self.beta
|
||||
#old
|
||||
#return -self.gp_link.dtransf_df(gp)*self.beta*np.log(obs) + special.psi(self.gp_link.transf(gp)*self.beta) * self.gp_link.dtransf_df(gp)*self.beta
|
||||
|
|
@ -118,7 +116,6 @@ class Gamma(Likelihood):
|
|||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
hess = -special.polygamma(1, self.beta*link_f)*(self.beta**2)
|
||||
#old
|
||||
#return -self.gp_link.d2transf_df2(gp)*self.beta*np.log(obs) + special.polygamma(1,self.gp_link.transf(gp)*self.beta)*(self.gp_link.dtransf_df(gp)*self.beta)**2 + special.psi(self.gp_link.transf(gp)*self.beta)*self.gp_link.d2transf_df2(gp)*self.beta
|
||||
|
|
@ -140,6 +137,5 @@ class Gamma(Likelihood):
|
|||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
d3lik_dlink3 = -special.polygamma(2, self.beta*link_f)*(self.beta**3)
|
||||
return d3lik_dlink3
|
||||
|
|
|
|||
|
|
@ -13,8 +13,8 @@ James 11/12/13
|
|||
|
||||
import numpy as np
|
||||
from scipy import stats, special
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
from . import link_functions
|
||||
from .likelihood import Likelihood
|
||||
from ..core.parameterization import Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
from scipy import stats
|
||||
|
|
@ -34,7 +34,9 @@ class Gaussian(Likelihood):
|
|||
if gp_link is None:
|
||||
gp_link = link_functions.Identity()
|
||||
|
||||
assert isinstance(gp_link, link_functions.Identity), "the likelihood only implemented for the identity link"
|
||||
if not isinstance(gp_link, link_functions.Identity):
|
||||
print("Warning, Exact inference is not implemeted for non-identity link functions,\
|
||||
if you are not already, ensure Laplace inference_method is used")
|
||||
|
||||
super(Gaussian, self).__init__(gp_link, name=name)
|
||||
|
||||
|
|
@ -130,11 +132,8 @@ class Gaussian(Likelihood):
|
|||
:returns: log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
N = y.shape[0]
|
||||
ln_det_cov = N*np.log(self.variance)
|
||||
|
||||
return -0.5*(np.sum((y-link_f)**2/self.variance) + ln_det_cov + N*np.log(2.*np.pi))
|
||||
ln_det_cov = np.log(self.variance)
|
||||
return -(1.0/(2*self.variance))*((y-link_f)**2) - 0.5*ln_det_cov - 0.5*np.log(2.*np.pi)
|
||||
|
||||
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -151,8 +150,7 @@ class Gaussian(Likelihood):
|
|||
:returns: gradient of log likelihood evaluated at points link(f)
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
s2_i = (1.0/self.variance)
|
||||
s2_i = 1.0/self.variance
|
||||
grad = s2_i*y - s2_i*link_f
|
||||
return grad
|
||||
|
||||
|
|
@ -178,9 +176,9 @@ class Gaussian(Likelihood):
|
|||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
N = y.shape[0]
|
||||
hess = -(1.0/self.variance)*np.ones((N, 1))
|
||||
D = link_f.shape[1]
|
||||
hess = -(1.0/self.variance)*np.ones((N, D))
|
||||
return hess
|
||||
|
||||
def d3logpdf_dlink3(self, link_f, y, Y_metadata=None):
|
||||
|
|
@ -198,9 +196,9 @@ class Gaussian(Likelihood):
|
|||
:returns: third derivative of log likelihood evaluated at points link(f)
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
N = y.shape[0]
|
||||
d3logpdf_dlink3 = np.zeros((N,1))
|
||||
D = link_f.shape[1]
|
||||
d3logpdf_dlink3 = np.zeros((N,D))
|
||||
return d3logpdf_dlink3
|
||||
|
||||
def dlogpdf_link_dvar(self, link_f, y, Y_metadata=None):
|
||||
|
|
@ -218,12 +216,10 @@ class Gaussian(Likelihood):
|
|||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
e = y - link_f
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
N = y.shape[0]
|
||||
dlik_dsigma = -0.5*N/self.variance + 0.5*s_4*np.sum(np.square(e))
|
||||
return np.sum(dlik_dsigma) # Sure about this sum?
|
||||
dlik_dsigma = -0.5/self.variance + 0.5*s_4*np.square(e)
|
||||
return dlik_dsigma
|
||||
|
||||
def dlogpdf_dlink_dvar(self, link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -240,7 +236,6 @@ class Gaussian(Likelihood):
|
|||
:returns: derivative of log likelihood evaluated at points link(f) w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
dlik_grad_dsigma = -s_4*y + s_4*link_f
|
||||
return dlik_grad_dsigma
|
||||
|
|
@ -260,23 +255,26 @@ class Gaussian(Likelihood):
|
|||
:returns: derivative of log hessian evaluated at points link(f_i) and link(f_j) w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.asarray(link_f).shape == np.asarray(y).shape
|
||||
s_4 = 1.0/(self.variance**2)
|
||||
N = y.shape[0]
|
||||
d2logpdf_dlink2_dvar = np.ones((N,1))*s_4
|
||||
D = link_f.shape[1]
|
||||
d2logpdf_dlink2_dvar = np.ones((N, D))*s_4
|
||||
return d2logpdf_dlink2_dvar
|
||||
|
||||
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return np.asarray([[dlogpdf_dvar]])
|
||||
dlogpdf_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
dlogpdf_dtheta[0,:,:] = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return dlogpdf_dtheta
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return dlogpdf_dlink_dvar
|
||||
dlogpdf_dlink_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
dlogpdf_dlink_dtheta[0, :, :]= self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return dlogpdf_dlink_dtheta
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
|
||||
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return d2logpdf_dlink2_dvar
|
||||
d2logpdf_dlink2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
d2logpdf_dlink2_dtheta[0, :, :] = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
|
||||
return d2logpdf_dlink2_dtheta
|
||||
|
||||
def _mean(self, gp):
|
||||
"""
|
||||
|
|
@ -309,18 +307,17 @@ class Gaussian(Likelihood):
|
|||
Ysim = np.array([np.random.normal(self.gp_link.transf(gpj), scale=np.sqrt(self.variance), size=1) for gpj in gp])
|
||||
return Ysim.reshape(orig_shape)
|
||||
|
||||
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||
def log_predictive_density(self, y_test, mu_star, var_star, Y_metadata=None):
|
||||
"""
|
||||
assumes independence
|
||||
"""
|
||||
v = var_star + self.variance
|
||||
return -0.5*np.log(2*np.pi) -0.5*np.log(v) - 0.5*np.square(y_test - mu_star)/v
|
||||
|
||||
def variational_expectations(self, Y, m, v, gh_points=None):
|
||||
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
|
||||
lik_var = float(self.variance)
|
||||
F = -0.5*np.log(2*np.pi) -0.5*np.log(lik_var) - 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/lik_var
|
||||
dF_dmu = (Y - m)/lik_var
|
||||
dF_dv = np.ones_like(v)*(-0.5/lik_var)
|
||||
dF_dlik_var = np.sum(-0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2))
|
||||
dF_dtheta = [dF_dlik_var]
|
||||
return F, dF_dmu, dF_dv, dF_dtheta
|
||||
dF_dtheta = -0.5/lik_var + 0.5*(np.square(Y) + np.square(m) + v - 2*m*Y)/(lik_var**2)
|
||||
return F, dF_dmu, dF_dv, dF_dtheta.reshape(1, Y.shape[0], Y.shape[1])
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
|
||||
# Copyright (c) 2012-2015 The GPy authors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats,special
|
||||
import scipy as sp
|
||||
import link_functions
|
||||
from ..util.misc import chain_1, chain_2, chain_3
|
||||
from . import link_functions
|
||||
from ..util.misc import chain_1, chain_2, chain_3, blockify_dhess_dtheta, blockify_third, blockify_hessian, safe_exp
|
||||
from scipy.integrate import quad
|
||||
import warnings
|
||||
from ..core.parameterization import Parameterized
|
||||
|
|
@ -39,6 +39,15 @@ class Likelihood(Parameterized):
|
|||
assert isinstance(gp_link,link_functions.GPTransformation), "gp_link is not a valid GPTransformation."
|
||||
self.gp_link = gp_link
|
||||
self.log_concave = False
|
||||
self.not_block_really = False
|
||||
|
||||
def request_num_latent_functions(self, Y):
|
||||
"""
|
||||
The likelihood should infer how many latent functions are needed for the likelihood
|
||||
|
||||
Default is the number of outputs
|
||||
"""
|
||||
return Y.shape[1]
|
||||
|
||||
def _gradients(self,partial):
|
||||
return np.zeros(0)
|
||||
|
|
@ -69,7 +78,7 @@ class Likelihood(Parameterized):
|
|||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def log_predictive_density(self, y_test, mu_star, var_star):
|
||||
def log_predictive_density(self, y_test, mu_star, var_star, Y_metadata=None):
|
||||
"""
|
||||
Calculation of the log predictive density
|
||||
|
||||
|
|
@ -86,17 +95,87 @@ class Likelihood(Parameterized):
|
|||
assert y_test.shape==mu_star.shape
|
||||
assert y_test.shape==var_star.shape
|
||||
assert y_test.shape[1] == 1
|
||||
def integral_generator(y, m, v):
|
||||
"""Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
|
||||
def f(f_star):
|
||||
return self.pdf(f_star, y)*np.exp(-(1./(2*v))*np.square(m-f_star))
|
||||
|
||||
flat_y_test = y_test.flatten()
|
||||
flat_mu_star = mu_star.flatten()
|
||||
flat_var_star = var_star.flatten()
|
||||
|
||||
if Y_metadata is not None:
|
||||
#Need to zip individual elements of Y_metadata aswell
|
||||
Y_metadata_flat = {}
|
||||
if Y_metadata is not None:
|
||||
for key, val in Y_metadata.items():
|
||||
Y_metadata_flat[key] = np.atleast_1d(val).reshape(-1,1)
|
||||
|
||||
zipped_values = []
|
||||
|
||||
for i in range(y_test.shape[0]):
|
||||
y_m = {}
|
||||
for key, val in Y_metadata_flat.items():
|
||||
if np.isscalar(val) or val.shape[0] == 1:
|
||||
y_m[key] = val
|
||||
else:
|
||||
#Won't broadcast yet
|
||||
y_m[key] = val[i]
|
||||
zipped_values.append((flat_y_test[i], flat_mu_star[i], flat_var_star[i], y_m))
|
||||
else:
|
||||
#Otherwise just pass along None's
|
||||
zipped_values = zip(flat_y_test, flat_mu_star, flat_var_star, [None]*y_test.shape[0])
|
||||
|
||||
def integral_generator(yi, mi, vi, yi_m):
|
||||
"""Generate a function which can be integrated
|
||||
to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
|
||||
def f(fi_star):
|
||||
#exponent = np.exp(-(1./(2*vi))*np.square(mi-fi_star))
|
||||
#from GPy.util.misc import safe_exp
|
||||
#exponent = safe_exp(exponent)
|
||||
#res = safe_exp(self.logpdf(fi_star, yi, yi_m))*exponent
|
||||
|
||||
#More stable in the log space
|
||||
res = np.exp(self.logpdf(fi_star, yi, yi_m)
|
||||
- 0.5*np.log(2*np.pi*vi)
|
||||
- 0.5*np.square(fi_star-mi)/vi)
|
||||
if not np.isfinite(res):
|
||||
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
|
||||
return res
|
||||
|
||||
return f
|
||||
|
||||
scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v), -np.inf, np.inf) for y, m, v in zip(y_test.flatten(), mu_star.flatten(), var_star.flatten())])
|
||||
scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1)
|
||||
p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star)
|
||||
p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf)
|
||||
for yi, mi, vi, yi_m in zipped_values])
|
||||
p_ystar = np.array(p_ystar).reshape(-1, 1)
|
||||
return np.log(p_ystar)
|
||||
|
||||
def log_predictive_density_sampling(self, y_test, mu_star, var_star, Y_metadata=None, num_samples=1000):
|
||||
"""
|
||||
Calculation of the log predictive density via sampling
|
||||
|
||||
.. math:
|
||||
log p(y_{*}|D) = log 1/num_samples prod^{S}_{s=1} p(y_{*}|f_{*s})
|
||||
f_{*s} ~ p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
:param mu_star: predictive mean of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||
:type mu_star: (Nx1) array
|
||||
:param var_star: predictive variance of gaussian p(f_{*}|mu_{*}, var_{*})
|
||||
:type var_star: (Nx1) array
|
||||
:param num_samples: num samples of p(f_{*}|mu_{*}, var_{*}) to take
|
||||
:type num_samples: int
|
||||
"""
|
||||
assert y_test.shape==mu_star.shape
|
||||
assert y_test.shape==var_star.shape
|
||||
assert y_test.shape[1] == 1
|
||||
|
||||
#Take samples of p(f*|y)
|
||||
#fi_samples = np.random.randn(num_samples)*np.sqrt(var_star) + mu_star
|
||||
fi_samples = np.random.normal(mu_star, np.sqrt(var_star), size=(mu_star.shape[0], num_samples))
|
||||
|
||||
from scipy.misc import logsumexp
|
||||
log_p_ystar = -np.log(num_samples) + logsumexp(self.logpdf(fi_samples, y_test, Y_metadata=Y_metadata), axis=1)
|
||||
return log_p_ystar
|
||||
|
||||
|
||||
def _moments_match_ep(self,obs,tau,v):
|
||||
"""
|
||||
Calculation of moments using quadrature
|
||||
|
|
@ -131,6 +210,13 @@ class Likelihood(Parameterized):
|
|||
|
||||
return z, mean, variance
|
||||
|
||||
#only compute gh points if required
|
||||
__gh_points = None
|
||||
def _gh_points(self, T=20):
|
||||
if self.__gh_points is None:
|
||||
self.__gh_points = np.polynomial.hermite.hermgauss(T)
|
||||
return self.__gh_points
|
||||
|
||||
def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
|
||||
"""
|
||||
Use Gauss-Hermite Quadrature to compute
|
||||
|
|
@ -143,10 +229,9 @@ class Likelihood(Parameterized):
|
|||
|
||||
if no gh_points are passed, we construct them using defualt options
|
||||
"""
|
||||
#May be broken
|
||||
|
||||
if gh_points is None:
|
||||
gh_x, gh_w = np.polynomial.hermite.hermgauss(20)
|
||||
gh_x, gh_w = self._gh_points()
|
||||
else:
|
||||
gh_x, gh_w = gh_points
|
||||
|
||||
|
|
@ -168,15 +253,22 @@ class Likelihood(Parameterized):
|
|||
#d2logp_dx2 = np.clip(d2logp_dx2,-1e9,1e9)
|
||||
|
||||
#average over the gird to get derivatives of the Gaussian's parameters
|
||||
F = np.dot(logp, gh_w)
|
||||
dF_dm = np.dot(dlogp_dx, gh_w)
|
||||
dF_dv = np.dot(d2logp_dx2, gh_w)/2.
|
||||
#division by pi comes from fact that for each quadrature we need to scale by 1/sqrt(pi)
|
||||
F = np.dot(logp, gh_w)/np.sqrt(np.pi)
|
||||
dF_dm = np.dot(dlogp_dx, gh_w)/np.sqrt(np.pi)
|
||||
dF_dv = np.dot(d2logp_dx2, gh_w)/np.sqrt(np.pi)
|
||||
dF_dv /= 2.
|
||||
|
||||
if np.any(np.isnan(dF_dv)) or np.any(np.isinf(dF_dv)):
|
||||
stop
|
||||
if np.any(np.isnan(dF_dm)) or np.any(np.isinf(dF_dm)):
|
||||
stop
|
||||
|
||||
if self.size:
|
||||
dF_dtheta = self.dlogpdf_dtheta(X, Y[:,None]) # Ntheta x (orig size) x N_{quad_points}
|
||||
dF_dtheta = np.dot(dF_dtheta, gh_w)
|
||||
dF_dtheta = dF_dtheta.reshape(self.size, shape[0], shape[1])
|
||||
else:
|
||||
dF_dtheta = None # Not yet implemented
|
||||
return F.reshape(*shape), dF_dm.reshape(*shape), dF_dv.reshape(*shape), dF_dtheta
|
||||
|
||||
|
|
@ -189,28 +281,35 @@ class Likelihood(Parameterized):
|
|||
|
||||
"""
|
||||
#conditional_mean: the edpected value of y given some f, under this likelihood
|
||||
fmin = -np.inf
|
||||
fmax = np.inf
|
||||
def int_mean(f,m,v):
|
||||
p = np.exp(-(0.5/v)*np.square(f - m))
|
||||
exponent = -(0.5/v)*np.square(f - m)
|
||||
#If exponent is under -30 then exp(exponent) will be very small, so don't exp it!)
|
||||
#If p is zero then conditional_mean will overflow
|
||||
assert v.all() > 0
|
||||
p = safe_exp(exponent)
|
||||
|
||||
#If p is zero then conditional_variance will overflow
|
||||
if p < 1e-10:
|
||||
return 0.
|
||||
else:
|
||||
return self.conditional_mean(f)*p
|
||||
scaled_mean = [quad(int_mean, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
scaled_mean = [quad(int_mean, fmin, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
mean = np.array(scaled_mean)[:,None] / np.sqrt(2*np.pi*(variance))
|
||||
|
||||
return mean
|
||||
|
||||
def _conditional_mean(self, f):
|
||||
"""Quadrature calculation of the conditional mean: E(Y_star|f)"""
|
||||
raise NotImplementedError, "implement this function to make predictions"
|
||||
raise NotImplementedError("implement this function to make predictions")
|
||||
|
||||
def predictive_variance(self, mu,variance, predictive_mean=None, Y_metadata=None):
|
||||
"""
|
||||
Approximation to the predictive variance: V(Y_star)
|
||||
|
||||
The following variance decomposition is used:
|
||||
V(Y_star) = E( V(Y_star|f_star) ) + V( E(Y_star|f_star) )
|
||||
V(Y_star) = E( V(Y_star|f_star)**2 ) + V( E(Y_star|f_star) )**2
|
||||
|
||||
:param mu: mean of posterior
|
||||
:param sigma: standard deviation of posterior
|
||||
|
|
@ -220,15 +319,22 @@ class Likelihood(Parameterized):
|
|||
#sigma2 = sigma**2
|
||||
normalizer = np.sqrt(2*np.pi*variance)
|
||||
|
||||
fmin_v = -np.inf
|
||||
fmin_m = np.inf
|
||||
fmin = -np.inf
|
||||
fmax = np.inf
|
||||
|
||||
from ..util.misc import safe_exp
|
||||
# E( V(Y_star|f_star) )
|
||||
def int_var(f,m,v):
|
||||
p = np.exp(-(0.5/v)*np.square(f - m))
|
||||
exponent = -(0.5/v)*np.square(f - m)
|
||||
p = safe_exp(exponent)
|
||||
#If p is zero then conditional_variance will overflow
|
||||
if p < 1e-10:
|
||||
return 0.
|
||||
else:
|
||||
return self.conditional_variance(f)*p
|
||||
scaled_exp_variance = [quad(int_var, -np.inf, np.inf,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
scaled_exp_variance = [quad(int_var, fmin_v, fmax,args=(mj,s2j))[0] for mj,s2j in zip(mu,variance)]
|
||||
exp_var = np.array(scaled_exp_variance)[:,None] / normalizer
|
||||
|
||||
#V( E(Y_star|f_star) ) = E( E(Y_star|f_star)**2 ) - E( E(Y_star|f_star) )**2
|
||||
|
|
@ -240,14 +346,15 @@ class Likelihood(Parameterized):
|
|||
|
||||
#E( E(Y_star|f_star)**2 )
|
||||
def int_pred_mean_sq(f,m,v,predictive_mean_sq):
|
||||
p = np.exp(-(0.5/v)*np.square(f - m))
|
||||
exponent = -(0.5/v)*np.square(f - m)
|
||||
p = np.exp(exponent)
|
||||
#If p is zero then conditional_mean**2 will overflow
|
||||
if p < 1e-10:
|
||||
return 0.
|
||||
else:
|
||||
return self.conditional_mean(f)**2*p
|
||||
|
||||
scaled_exp_exp2 = [quad(int_pred_mean_sq, -np.inf, np.inf,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
|
||||
scaled_exp_exp2 = [quad(int_pred_mean_sq, fmin_m, fmax,args=(mj,s2j,pm2j))[0] for mj,s2j,pm2j in zip(mu,variance,predictive_mean_sq)]
|
||||
exp_exp2 = np.array(scaled_exp_exp2)[:,None] / normalizer
|
||||
|
||||
var_exp = exp_exp2 - predictive_mean_sq
|
||||
|
|
@ -295,9 +402,19 @@ class Likelihood(Parameterized):
|
|||
:returns: likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
return self.pdf_link(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.pdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
|
||||
def logpdf_sum(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Convenience function that can overridden for functions where this could
|
||||
be computed more efficiently
|
||||
"""
|
||||
return np.sum(self.logpdf(f, y, Y_metadata=Y_metadata))
|
||||
|
||||
def logpdf(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the log likelihood (log pdf) using it
|
||||
|
|
@ -313,6 +430,9 @@ class Likelihood(Parameterized):
|
|||
:returns: log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
return self.logpdf_link(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.logpdf_link(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
|
||||
|
|
@ -332,11 +452,15 @@ class Likelihood(Parameterized):
|
|||
:returns: derivative of log likelihood evaluated for this point
|
||||
:rtype: 1xN array
|
||||
"""
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
return self.dlogpdf_dlink(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
return chain_1(dlogpdf_dlink, dlink_df)
|
||||
|
||||
@blockify_hessian
|
||||
def d2logpdf_df2(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the second derivative of log likelihood using it
|
||||
|
|
@ -353,13 +477,18 @@ class Likelihood(Parameterized):
|
|||
:returns: second derivative of log likelihood evaluated for this point (diagonal only)
|
||||
:rtype: 1xN array
|
||||
"""
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
d2logpdf_df2 = self.d2logpdf_dlink2(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
d2logpdf_dlink2 = self.d2logpdf_dlink2(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
return chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
||||
d2logpdf_df2 = chain_2(d2logpdf_dlink2, dlink_df, dlogpdf_dlink, d2link_df2)
|
||||
return d2logpdf_df2
|
||||
|
||||
@blockify_third
|
||||
def d3logpdf_df3(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
Evaluates the link function link(f) then computes the third derivative of log likelihood using it
|
||||
|
|
@ -376,6 +505,9 @@ class Likelihood(Parameterized):
|
|||
:returns: third derivative of log likelihood evaluated for this point
|
||||
:rtype: float
|
||||
"""
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
d3logpdf_df3 = self.d3logpdf_dlink3(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
d3logpdf_dlink3 = self.d3logpdf_dlink3(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
|
|
@ -383,46 +515,75 @@ class Likelihood(Parameterized):
|
|||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
dlogpdf_dlink = self.dlogpdf_dlink(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
d3link_df3 = self.gp_link.d3transf_df3(f)
|
||||
return chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
||||
d3logpdf_df3 = chain_3(d3logpdf_dlink3, dlink_df, d2logpdf_dlink2, d2link_df2, dlogpdf_dlink, d3link_df3)
|
||||
return d3logpdf_df3
|
||||
|
||||
|
||||
def dlogpdf_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
if self.not_block_really:
|
||||
raise NotImplementedError("Need to make a decorator for this!")
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
return self.dlogpdf_link_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
return self.dlogpdf_link_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([1, 0])
|
||||
return np.zeros((0, f.shape[0], f.shape[1]))
|
||||
|
||||
def dlogpdf_df_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
if self.not_block_really:
|
||||
raise NotImplementedError("Need to make a decorator for this!")
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
return self.dlogpdf_dlink_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
||||
|
||||
dlogpdf_df_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
#Chain each parameter of hte likelihood seperately
|
||||
for p in range(self.size):
|
||||
dlogpdf_df_dtheta[p, :, :] = chain_1(dlogpdf_dlink_dtheta[p,:,:], dlink_df)
|
||||
return dlogpdf_df_dtheta
|
||||
#return chain_1(dlogpdf_dlink_dtheta, dlink_df)
|
||||
else:
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([f.shape[0], 0])
|
||||
return np.zeros((0, f.shape[0], f.shape[1]))
|
||||
|
||||
def d2logpdf_df2_dtheta(self, f, y, Y_metadata=None):
|
||||
"""
|
||||
TODO: Doc strings
|
||||
"""
|
||||
if self.size > 0:
|
||||
if self.not_block_really:
|
||||
raise NotImplementedError("Need to make a decorator for this!")
|
||||
if isinstance(self.gp_link, link_functions.Identity):
|
||||
return self.d2logpdf_dlink2_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
else:
|
||||
inv_link_f = self.gp_link.transf(f)
|
||||
dlink_df = self.gp_link.dtransf_df(f)
|
||||
d2link_df2 = self.gp_link.d2transf_df2(f)
|
||||
d2logpdf_dlink2_dtheta = self.d2logpdf_dlink2_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink_dtheta = self.dlogpdf_dlink_dtheta(inv_link_f, y, Y_metadata=Y_metadata)
|
||||
return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
||||
|
||||
d2logpdf_df2_dtheta = np.zeros((self.size, f.shape[0], f.shape[1]))
|
||||
#Chain each parameter of hte likelihood seperately
|
||||
for p in range(self.size):
|
||||
d2logpdf_df2_dtheta[p, :, :] = chain_2(d2logpdf_dlink2_dtheta[p,:,:], dlink_df, dlogpdf_dlink_dtheta[p,:,:], d2link_df2)
|
||||
return d2logpdf_df2_dtheta
|
||||
#return chain_2(d2logpdf_dlink2_dtheta, dlink_df, dlogpdf_dlink_dtheta, d2link_df2)
|
||||
else:
|
||||
# There are no parameters so return an empty array for derivatives
|
||||
return np.zeros([f.shape[0], 0])
|
||||
return np.zeros((0, f.shape[0], f.shape[1]))
|
||||
|
||||
def _laplace_gradients(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dtheta = self.dlogpdf_dtheta(f, y, Y_metadata=Y_metadata)
|
||||
|
|
@ -431,9 +592,9 @@ class Likelihood(Parameterized):
|
|||
|
||||
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
|
||||
# ensure we have gradients for every parameter we want to optimize
|
||||
assert len(dlogpdf_dtheta) == self.size #1 x num_param array
|
||||
assert dlogpdf_df_dtheta.shape[1] == self.size #f x num_param matrix
|
||||
assert d2logpdf_df2_dtheta.shape[1] == self.size #f x num_param matrix
|
||||
assert dlogpdf_dtheta.shape[0] == self.size #num_param array x f, d
|
||||
assert dlogpdf_df_dtheta.shape[0] == self.size #num_param x f x d x matrix or just num_param x f
|
||||
assert d2logpdf_df2_dtheta.shape[0] == self.size #num_param x f matrix or num_param x f x d x matrix, num_param x f x f or num_param x f x f x d
|
||||
|
||||
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta
|
||||
|
||||
|
|
@ -454,19 +615,98 @@ class Likelihood(Parameterized):
|
|||
|
||||
def predictive_quantiles(self, mu, var, quantiles, Y_metadata=None):
|
||||
#compute the quantiles by sampling!!!
|
||||
N_samp = 1000
|
||||
N_samp = 500
|
||||
s = np.random.randn(mu.shape[0], N_samp)*np.sqrt(var) + mu
|
||||
#ss_f = s.flatten()
|
||||
#ss_y = self.samples(ss_f, Y_metadata)
|
||||
#ss_y = self.samples(s, Y_metadata, samples=100)
|
||||
ss_y = self.samples(s, Y_metadata)
|
||||
#ss_y = ss_y.reshape(mu.shape[0], N_samp)
|
||||
|
||||
return [np.percentile(ss_y ,q, axis=1)[:,None] for q in quantiles]
|
||||
|
||||
def samples(self, gp, Y_metadata=None):
|
||||
def samples(self, gp, Y_metadata=None, samples=1):
|
||||
"""
|
||||
Returns a set of samples of observations based on a given value of the latent variable.
|
||||
|
||||
:param gp: latent variable
|
||||
:param samples: number of samples to take for each f location
|
||||
"""
|
||||
raise NotImplementedError
|
||||
raise NotImplementedError("""May be possible to use MCMC with user-tuning, see
|
||||
MCMC_pdf_samples in likelihood.py and write samples function
|
||||
using this, beware this is a simple implementation
|
||||
of Metropolis and will not work well for all likelihoods""")
|
||||
|
||||
def MCMC_pdf_samples(self, fNew, num_samples=1000, starting_loc=None, stepsize=0.1, burn_in=1000, Y_metadata=None):
|
||||
"""
|
||||
Simple implementation of Metropolis sampling algorithm
|
||||
|
||||
Will run a parallel chain for each input dimension (treats each f independently)
|
||||
Thus assumes f*_1 independant of f*_2 etc.
|
||||
|
||||
:param num_samples: Number of samples to take
|
||||
:param fNew: f at which to sample around
|
||||
:param starting_loc: Starting locations of the independant chains (usually will be conditional_mean of likelihood), often link_f
|
||||
:param stepsize: Stepsize for the normal proposal distribution (will need modifying)
|
||||
:param burnin: number of samples to use for burnin (will need modifying)
|
||||
:param Y_metadata: Y_metadata for pdf
|
||||
"""
|
||||
print("Warning, using MCMC for sampling y*, needs to be tuned!")
|
||||
if starting_loc is None:
|
||||
starting_loc = fNew
|
||||
from functools import partial
|
||||
logpdf = partial(self.logpdf, f=fNew, Y_metadata=Y_metadata)
|
||||
pdf = lambda y_star: np.exp(logpdf(y=y_star[:, None]))
|
||||
#Should be the link function of f is a good starting point
|
||||
#(i.e. the point before you corrupt it with the likelihood)
|
||||
par_chains = starting_loc.shape[0]
|
||||
chain_values = np.zeros((par_chains, num_samples))
|
||||
chain_values[:, 0][:,None] = starting_loc
|
||||
#Use same stepsize for all par_chains
|
||||
stepsize = np.ones(par_chains)*stepsize
|
||||
accepted = np.zeros((par_chains, num_samples+burn_in))
|
||||
accept_ratio = np.zeros(num_samples+burn_in)
|
||||
#Whilst burning in, only need to keep the previous lot
|
||||
burnin_cache = np.zeros(par_chains)
|
||||
burnin_cache[:] = starting_loc.flatten()
|
||||
burning_in = True
|
||||
for i in xrange(burn_in+num_samples):
|
||||
next_ind = i-burn_in
|
||||
if burning_in:
|
||||
old_y = burnin_cache
|
||||
else:
|
||||
old_y = chain_values[:,next_ind-1]
|
||||
|
||||
old_lik = pdf(old_y)
|
||||
#Propose new y from Gaussian proposal
|
||||
new_y = np.random.normal(loc=old_y, scale=stepsize)
|
||||
new_lik = pdf(new_y)
|
||||
#Accept using Metropolis (not hastings) acceptance
|
||||
#Always accepts if new_lik > old_lik
|
||||
accept_probability = np.minimum(1, new_lik/old_lik)
|
||||
u = np.random.uniform(0,1,par_chains)
|
||||
#print "Accept prob: ", accept_probability
|
||||
accepts = u < accept_probability
|
||||
if burning_in:
|
||||
burnin_cache[accepts] = new_y[accepts]
|
||||
burnin_cache[~accepts] = old_y[~accepts]
|
||||
if i == burn_in:
|
||||
burning_in = False
|
||||
chain_values[:,0] = burnin_cache
|
||||
else:
|
||||
#If it was accepted then new_y becomes the latest sample
|
||||
chain_values[accepts, next_ind] = new_y[accepts]
|
||||
#Otherwise use old y as the sample
|
||||
chain_values[~accepts, next_ind] = old_y[~accepts]
|
||||
|
||||
accepted[~accepts, i] = 0
|
||||
accepted[accepts, i] = 1
|
||||
accept_ratio[i] = np.sum(accepted[:,i])/float(par_chains)
|
||||
|
||||
#Show progress
|
||||
if i % int((burn_in+num_samples)*0.1) == 0:
|
||||
print("{}% of samples taken ({})".format((i/int((burn_in+num_samples)*0.1)*10), i))
|
||||
print("Last run accept ratio: ", accept_ratio[i])
|
||||
|
||||
print("Average accept ratio: ", np.mean(accept_ratio))
|
||||
return chain_values
|
||||
|
|
|
|||
|
|
@ -1,13 +1,10 @@
|
|||
# Copyright (c) 2012-2014 The GPy authors (see AUTHORS.txt)
|
||||
# Copyright (c) 2012-2015 The GPy authors (see AUTHORS.txt)
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf
|
||||
import scipy as sp
|
||||
from GPy.util.univariate_Gaussian import std_norm_pdf,std_norm_cdf,inv_std_norm_cdf
|
||||
|
||||
_exp_lim_val = np.finfo(np.float64).max
|
||||
_lim_val = np.log(_exp_lim_val)
|
||||
from ..util.misc import safe_exp, safe_square, safe_cube, safe_quad, safe_three_times
|
||||
|
||||
class GPTransformation(object):
|
||||
"""
|
||||
|
|
@ -79,13 +76,10 @@ class Probit(GPTransformation):
|
|||
return std_norm_pdf(f)
|
||||
|
||||
def d2transf_df2(self,f):
|
||||
#FIXME
|
||||
return -f * std_norm_pdf(f)
|
||||
|
||||
def d3transf_df3(self,f):
|
||||
#FIXME
|
||||
f2 = f**2
|
||||
return -(1/(np.sqrt(2*np.pi)))*np.exp(-0.5*(f2))*(1-f2)
|
||||
return (safe_square(f)-1.)*std_norm_pdf(f)
|
||||
|
||||
|
||||
class Cloglog(GPTransformation):
|
||||
|
|
@ -101,19 +95,23 @@ class Cloglog(GPTransformation):
|
|||
|
||||
"""
|
||||
def transf(self,f):
|
||||
return 1-np.exp(-np.exp(f))
|
||||
ef = safe_exp(f)
|
||||
return 1-np.exp(-ef)
|
||||
|
||||
def dtransf_df(self,f):
|
||||
return np.exp(f-np.exp(f))
|
||||
ef = safe_exp(f)
|
||||
return np.exp(f-ef)
|
||||
|
||||
def d2transf_df2(self,f):
|
||||
ef = np.exp(f)
|
||||
ef = safe_exp(f)
|
||||
return -np.exp(f-ef)*(ef-1.)
|
||||
|
||||
def d3transf_df3(self,f):
|
||||
ef = np.exp(f)
|
||||
return np.exp(f-ef)*(1.-3*ef + ef**2)
|
||||
|
||||
ef = safe_exp(f)
|
||||
ef2 = safe_square(ef)
|
||||
three_times_ef = safe_three_times(ef)
|
||||
r_val = np.exp(f-ef)*(1.-three_times_ef + ef2)
|
||||
return r_val
|
||||
|
||||
class Log(GPTransformation):
|
||||
"""
|
||||
|
|
@ -123,16 +121,16 @@ class Log(GPTransformation):
|
|||
|
||||
"""
|
||||
def transf(self,f):
|
||||
return np.exp(np.clip(f, -_lim_val, _lim_val))
|
||||
return safe_exp(f)
|
||||
|
||||
def dtransf_df(self,f):
|
||||
return np.exp(np.clip(f, -_lim_val, _lim_val))
|
||||
return safe_exp(f)
|
||||
|
||||
def d2transf_df2(self,f):
|
||||
return np.exp(np.clip(f, -_lim_val, _lim_val))
|
||||
return safe_exp(f)
|
||||
|
||||
def d3transf_df3(self,f):
|
||||
return np.exp(np.clip(f, -_lim_val, _lim_val))
|
||||
return safe_exp(f)
|
||||
|
||||
class Log_ex_1(GPTransformation):
|
||||
"""
|
||||
|
|
@ -142,17 +140,20 @@ class Log_ex_1(GPTransformation):
|
|||
|
||||
"""
|
||||
def transf(self,f):
|
||||
return np.log(1.+np.exp(f))
|
||||
return np.log1p(safe_exp(f))
|
||||
|
||||
def dtransf_df(self,f):
|
||||
return np.exp(f)/(1.+np.exp(f))
|
||||
ef = safe_exp(f)
|
||||
return ef/(1.+ef)
|
||||
|
||||
def d2transf_df2(self,f):
|
||||
aux = np.exp(f)/(1.+np.exp(f))
|
||||
ef = safe_exp(f)
|
||||
aux = ef/(1.+ef)
|
||||
return aux*(1.-aux)
|
||||
|
||||
def d3transf_df3(self,f):
|
||||
aux = np.exp(f)/(1.+np.exp(f))
|
||||
ef = safe_exp(f)
|
||||
aux = ef/(1.+ef)
|
||||
daux_df = aux*(1.-aux)
|
||||
return daux_df - (2.*aux*daux_df)
|
||||
|
||||
|
|
@ -160,21 +161,24 @@ class Reciprocal(GPTransformation):
|
|||
def transf(self,f):
|
||||
return 1./f
|
||||
|
||||
def dtransf_df(self,f):
|
||||
return -1./(f**2)
|
||||
def dtransf_df(self, f):
|
||||
f2 = safe_square(f)
|
||||
return -1./f2
|
||||
|
||||
def d2transf_df2(self,f):
|
||||
return 2./(f**3)
|
||||
def d2transf_df2(self, f):
|
||||
f3 = safe_cube(f)
|
||||
return 2./f3
|
||||
|
||||
def d3transf_df3(self,f):
|
||||
return -6./(f**4)
|
||||
f4 = safe_quad(f)
|
||||
return -6./f4
|
||||
|
||||
class Heaviside(GPTransformation):
|
||||
"""
|
||||
|
||||
.. math::
|
||||
|
||||
g(f) = I_{x \\in A}
|
||||
g(f) = I_{x \\geq 0}
|
||||
|
||||
"""
|
||||
def transf(self,f):
|
||||
|
|
@ -182,7 +186,7 @@ class Heaviside(GPTransformation):
|
|||
return np.where(f>0, 1, 0)
|
||||
|
||||
def dtransf_df(self,f):
|
||||
raise NotImplementedError, "This function is not differentiable!"
|
||||
raise NotImplementedError("This function is not differentiable!")
|
||||
|
||||
def d2transf_df2(self,f):
|
||||
raise NotImplementedError, "This function is not differentiable!"
|
||||
raise NotImplementedError("This function is not differentiable!")
|
||||
|
|
|
|||
|
|
@ -3,9 +3,9 @@
|
|||
|
||||
import numpy as np
|
||||
from scipy import stats, special
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
from gaussian import Gaussian
|
||||
from . import link_functions
|
||||
from .likelihood import Likelihood
|
||||
from .gaussian import Gaussian
|
||||
from ..core.parameterization import Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
from ..core.parameterization import Parameterized
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@ from __future__ import division
|
|||
import numpy as np
|
||||
from scipy import stats,special
|
||||
import scipy as sp
|
||||
import link_functions
|
||||
from likelihood import Likelihood
|
||||
from . import link_functions
|
||||
from .likelihood import Likelihood
|
||||
|
||||
class Poisson(Likelihood):
|
||||
"""
|
||||
|
|
@ -122,7 +122,6 @@ class Poisson(Likelihood):
|
|||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(link_f).shape == np.atleast_1d(y).shape
|
||||
d3lik_dlink3 = 2*y/(link_f)**3
|
||||
return d3lik_dlink3
|
||||
|
||||
|
|
|
|||
|
|
@ -4,12 +4,13 @@
|
|||
import numpy as np
|
||||
from scipy import stats, special
|
||||
import scipy as sp
|
||||
import link_functions
|
||||
from . import link_functions
|
||||
from scipy import stats, integrate
|
||||
from scipy.special import gammaln, gamma
|
||||
from likelihood import Likelihood
|
||||
from .likelihood import Likelihood
|
||||
from ..core.parameterization import Param
|
||||
from ..core.parameterization.transformations import Logexp
|
||||
from scipy.special import psi as digamma
|
||||
|
||||
class StudentT(Likelihood):
|
||||
"""
|
||||
|
|
@ -28,16 +29,13 @@ class StudentT(Likelihood):
|
|||
super(StudentT, self).__init__(gp_link, name='Student_T')
|
||||
# sigma2 is not a noise parameter, it is a squared scale.
|
||||
self.sigma2 = Param('t_scale2', float(sigma2), Logexp())
|
||||
self.v = Param('deg_free', float(deg_free))
|
||||
self.v = Param('deg_free', float(deg_free), Logexp())
|
||||
self.link_parameter(self.sigma2)
|
||||
self.link_parameter(self.v)
|
||||
self.v.constrain_fixed()
|
||||
#self.v.constrain_fixed()
|
||||
|
||||
self.log_concave = False
|
||||
|
||||
def parameters_changed(self):
|
||||
self.variance = (self.v / float(self.v - 2)) * self.sigma2
|
||||
|
||||
def update_gradients(self, grads):
|
||||
"""
|
||||
Pull out the gradients, be careful as the order must match the order
|
||||
|
|
@ -86,7 +84,6 @@ class StudentT(Likelihood):
|
|||
:rtype: float
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
#FIXME:
|
||||
#Why does np.log(1 + (1/self.v)*((y-inv_link_f)**2)/self.sigma2) suppress the divide by zero?!
|
||||
|
|
@ -97,7 +94,7 @@ class StudentT(Likelihood):
|
|||
- 0.5*np.log(self.sigma2 * self.v * np.pi)
|
||||
- 0.5*(self.v + 1)*np.log(1 + (1/np.float(self.v))*((e**2)/self.sigma2))
|
||||
)
|
||||
return np.sum(objective)
|
||||
return objective
|
||||
|
||||
def dlogpdf_dlink(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -115,7 +112,6 @@ class StudentT(Likelihood):
|
|||
:rtype: Nx1 array
|
||||
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
grad = ((self.v + 1) * e) / (self.v * self.sigma2 + (e**2))
|
||||
return grad
|
||||
|
|
@ -141,7 +137,6 @@ class StudentT(Likelihood):
|
|||
Will return diagonal of hessian, since every where else it is 0, as the likelihood factorizes over cases
|
||||
(the distribution for y_i depends only on link(f_i) not on link(f_(j!=i))
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
hess = ((self.v + 1)*(e**2 - self.v*self.sigma2)) / ((self.sigma2*self.v + e**2)**2)
|
||||
return hess
|
||||
|
|
@ -161,7 +156,6 @@ class StudentT(Likelihood):
|
|||
:returns: third derivative of likelihood evaluated at points f
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
d3lik_dlink3 = ( -(2*(self.v + 1)*(-e)*(e**2 - 3*self.v*self.sigma2)) /
|
||||
((e**2 + self.sigma2*self.v)**3)
|
||||
|
|
@ -183,10 +177,10 @@ class StudentT(Likelihood):
|
|||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: float
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
dlogpdf_dvar = self.v*(e**2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e**2))
|
||||
return np.sum(dlogpdf_dvar)
|
||||
e2 = np.square(e)
|
||||
dlogpdf_dvar = self.v*(e2 - self.sigma2)/(2*self.sigma2*(self.sigma2*self.v + e2))
|
||||
return dlogpdf_dvar
|
||||
|
||||
def dlogpdf_dlink_dvar(self, inv_link_f, y, Y_metadata=None):
|
||||
"""
|
||||
|
|
@ -203,7 +197,6 @@ class StudentT(Likelihood):
|
|||
:returns: derivative of likelihood evaluated at points f w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
dlogpdf_dlink_dvar = (self.v*(self.v+1)*(-e))/((self.sigma2*self.v + e**2)**2)
|
||||
return dlogpdf_dlink_dvar
|
||||
|
|
@ -223,27 +216,53 @@ class StudentT(Likelihood):
|
|||
:returns: derivative of hessian evaluated at points f and f_j w.r.t variance parameter
|
||||
:rtype: Nx1 array
|
||||
"""
|
||||
assert np.atleast_1d(inv_link_f).shape == np.atleast_1d(y).shape
|
||||
e = y - inv_link_f
|
||||
d2logpdf_dlink2_dvar = ( (self.v*(self.v+1)*(self.sigma2*self.v - 3*(e**2)))
|
||||
/ ((self.sigma2*self.v + (e**2))**3)
|
||||
)
|
||||
return d2logpdf_dlink2_dvar
|
||||
|
||||
def dlogpdf_link_dv(self, inv_link_f, y, Y_metadata=None):
|
||||
e = y - inv_link_f
|
||||
e2 = np.square(e)
|
||||
df = float(self.v[:])
|
||||
s2 = float(self.sigma2[:])
|
||||
dlogpdf_dv = 0.5*digamma(0.5*(df+1)) - 0.5*digamma(0.5*df) - 1.0/(2*df)
|
||||
dlogpdf_dv += 0.5*(df+1)*e2/(df*(e2 + s2*df))
|
||||
dlogpdf_dv -= 0.5*np.log1p(e2/(s2*df))
|
||||
return dlogpdf_dv
|
||||
|
||||
def dlogpdf_dlink_dv(self, inv_link_f, y, Y_metadata=None):
|
||||
e = y - inv_link_f
|
||||
e2 = np.square(e)
|
||||
df = float(self.v[:])
|
||||
s2 = float(self.sigma2[:])
|
||||
dlogpdf_df_dv = e*(e2 - self.sigma2)/(e2 + s2*df)**2
|
||||
return dlogpdf_df_dv
|
||||
|
||||
def d2logpdf_dlink2_dv(self, inv_link_f, y, Y_metadata=None):
|
||||
e = y - inv_link_f
|
||||
e2 = np.square(e)
|
||||
df = float(self.v[:])
|
||||
s2 = float(self.sigma2[:])
|
||||
e2_s2v = e**2 + s2*df
|
||||
d2logpdf_df2_dv = (-s2*(df+1) + e2 - s2*df)/e2_s2v**2 - 2*s2*(df+1)*(e2 - s2*df)/e2_s2v**3
|
||||
return d2logpdf_df2_dv
|
||||
|
||||
def dlogpdf_link_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dvar = self.dlogpdf_link_dvar(f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dv = np.zeros_like(dlogpdf_dvar) #FIXME: Not done yet
|
||||
return np.hstack((dlogpdf_dvar, dlogpdf_dv))
|
||||
dlogpdf_dv = self.dlogpdf_link_dv(f, y, Y_metadata=Y_metadata)
|
||||
return np.array((dlogpdf_dvar, dlogpdf_dv))
|
||||
|
||||
def dlogpdf_dlink_dtheta(self, f, y, Y_metadata=None):
|
||||
dlogpdf_dlink_dvar = self.dlogpdf_dlink_dvar(f, y, Y_metadata=Y_metadata)
|
||||
dlogpdf_dlink_dv = np.zeros_like(dlogpdf_dlink_dvar) #FIXME: Not done yet
|
||||
return np.hstack((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
|
||||
dlogpdf_dlink_dv = self.dlogpdf_dlink_dv(f, y, Y_metadata=Y_metadata)
|
||||
return np.array((dlogpdf_dlink_dvar, dlogpdf_dlink_dv))
|
||||
|
||||
def d2logpdf_dlink2_dtheta(self, f, y, Y_metadata=None):
|
||||
d2logpdf_dlink2_dvar = self.d2logpdf_dlink2_dvar(f, y, Y_metadata=Y_metadata)
|
||||
d2logpdf_dlink2_dv = np.zeros_like(d2logpdf_dlink2_dvar) #FIXME: Not done yet
|
||||
return np.hstack((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
|
||||
d2logpdf_dlink2_dv = self.d2logpdf_dlink2_dv(f, y, Y_metadata=Y_metadata)
|
||||
return np.array((d2logpdf_dlink2_dvar, d2logpdf_dlink2_dv))
|
||||
|
||||
def predictive_mean(self, mu, sigma, Y_metadata=None):
|
||||
# The comment here confuses mean and median.
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernel import Kernel
|
||||
from linear import Linear
|
||||
from mlp import MLP
|
||||
#from rbf import RBF
|
||||
from .kernel import Kernel
|
||||
from .linear import Linear
|
||||
from .mlp import MLP
|
||||
from .additive import Additive
|
||||
from .compound import Compound
|
||||
from .constant import Constant
|
||||
|
||||
|
|
|
|||
|
|
@ -2,8 +2,7 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..core.mapping import Mapping
|
||||
import GPy
|
||||
from ..core import Mapping
|
||||
|
||||
class Additive(Mapping):
|
||||
"""
|
||||
|
|
@ -17,45 +16,23 @@ class Additive(Mapping):
|
|||
:type mapping1: GPy.mappings.Mapping
|
||||
:param mapping2: second mapping to add together.
|
||||
:type mapping2: GPy.mappings.Mapping
|
||||
:param tensor: whether or not to use the tensor product of input spaces
|
||||
:type tensor: bool
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, mapping1, mapping2, tensor=False):
|
||||
if tensor:
|
||||
input_dim = mapping1.input_dim + mapping2.input_dim
|
||||
else:
|
||||
input_dim = mapping1.input_dim
|
||||
def __init__(self, mapping1, mapping2):
|
||||
assert(mapping1.input_dim==mapping2.input_dim)
|
||||
assert(mapping1.output_dim==mapping2.output_dim)
|
||||
output_dim = mapping1.output_dim
|
||||
input_dim, output_dim = mapping1.input_dim, mapping1.output_dim
|
||||
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
|
||||
self.mapping1 = mapping1
|
||||
self.mapping2 = mapping2
|
||||
self.num_params = self.mapping1.num_params + self.mapping2.num_params
|
||||
self.name = self.mapping1.name + '+' + self.mapping2.name
|
||||
def _get_param_names(self):
|
||||
return self.mapping1._get_param_names + self.mapping2._get_param_names
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.mapping1._get_params(), self.mapping2._get_params()))
|
||||
|
||||
def _set_params(self, x):
|
||||
self.mapping1._set_params(x[:self.mapping1.num_params])
|
||||
self.mapping2._set_params(x[self.mapping1.num_params:])
|
||||
|
||||
def randomize(self):
|
||||
self.mapping1._randomize()
|
||||
self.mapping2._randomize()
|
||||
|
||||
def f(self, X):
|
||||
return self.mapping1.f(X) + self.mapping2.f(X)
|
||||
|
||||
def df_dtheta(self, dL_df, X):
|
||||
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T
|
||||
self._df_dbias = (dL_df.sum(0))
|
||||
return np.hstack((self._df_dA.flatten(), self._df_dbias))
|
||||
def update_gradients(self, dL_dF, X):
|
||||
self.mapping1.update_gradients(dL_dF, X)
|
||||
self.mapping2.update_gradients(dL_dF, X)
|
||||
|
||||
def df_dX(self, dL_df, X):
|
||||
return self.kern.dK_dX((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
|
||||
def gradients_X(self, dL_dF, X):
|
||||
return self.mapping1.gradients_X(dL_dF, X) + self.mapping2.gradients_X(dL_dF, X)
|
||||
|
|
|
|||
39
GPy/mappings/compound.py
Normal file
39
GPy/mappings/compound.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
# Copyright (c) 2015, James Hensman and Alan Saul
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from ..core import Mapping
|
||||
|
||||
class Compound(Mapping):
|
||||
"""
|
||||
Mapping based on passing one mapping through another
|
||||
|
||||
.. math::
|
||||
|
||||
f(\mathbf{x}) = f_2(f_1(\mathbf{x}))
|
||||
|
||||
:param mapping1: first mapping
|
||||
:type mapping1: GPy.mappings.Mapping
|
||||
:param mapping2: second mapping
|
||||
:type mapping2: GPy.mappings.Mapping
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, mapping1, mapping2):
|
||||
assert(mapping1.output_dim==mapping2.input_dim)
|
||||
input_dim, output_dim = mapping1.input_dim, mapping2.output_dim
|
||||
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim)
|
||||
self.mapping1 = mapping1
|
||||
self.mapping2 = mapping2
|
||||
self.link_parameters(self.mapping1, self.mapping2)
|
||||
|
||||
def f(self, X):
|
||||
return self.mapping2.f(self.mapping1.f(X))
|
||||
|
||||
def update_gradients(self, dL_dF, X):
|
||||
hidden = self.mapping1.f(X)
|
||||
self.mapping2.update_gradients(dL_dF, hidden)
|
||||
self.mapping1.update_gradients(self.mapping2.gradients_X(dL_dF, hidden), X)
|
||||
|
||||
def gradients_X(self, dL_dF, X):
|
||||
hidden = self.mapping1.f(X)
|
||||
return self.mapping1.gradients_X(self.mapping2.gradients_X(dL_dF, hidden), X)
|
||||
40
GPy/mappings/constant.py
Normal file
40
GPy/mappings/constant.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# Copyright (c) 2015, James Hensman, Alan Saul
|
||||
import numpy as np
|
||||
from ..core.mapping import Mapping
|
||||
from ..core.parameterization import Param
|
||||
|
||||
class Constant(Mapping):
|
||||
"""
|
||||
A Linear mapping.
|
||||
|
||||
.. math::
|
||||
|
||||
F(\mathbf{x}) = c
|
||||
|
||||
|
||||
:param input_dim: dimension of input.
|
||||
:type input_dim: int
|
||||
:param output_dim: dimension of output.
|
||||
:type output_dim: int
|
||||
:param: value the value of this constant mapping
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, output_dim, value=0., name='constmap'):
|
||||
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
|
||||
value = np.atleast_1d(value)
|
||||
if not len(value.shape) ==1:
|
||||
raise ValueError("bad constant values: pass a float or flat vectoor")
|
||||
elif value.size==1:
|
||||
value = np.ones(self.output_dim)*value
|
||||
self.C = Param('C', value)
|
||||
self.link_parameter(self.C)
|
||||
|
||||
def f(self, X):
|
||||
return np.tile(self.C.values[None,:], (X.shape[0], 1))
|
||||
|
||||
def update_gradients(self, dL_dF, X):
|
||||
self.C.gradient = dL_dF.sum(0)
|
||||
|
||||
def gradients_X(self, dL_dF, X):
|
||||
return np.zeros_like(X)
|
||||
26
GPy/mappings/identity.py
Normal file
26
GPy/mappings/identity.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
# Copyright (c) 2015, James Hensman
|
||||
|
||||
from ..core.mapping import Mapping
|
||||
from ..core import Param
|
||||
|
||||
class Identity(Mapping):
|
||||
"""
|
||||
A mapping that does nothing!
|
||||
"""
|
||||
def __init__(self, input_dim, output_dim, name='identity'):
|
||||
Mapping.__init__(self, input_dim, output_dim, name)
|
||||
|
||||
def f(self, X):
|
||||
return X
|
||||
|
||||
def update_gradients(self, dL_dF, X):
|
||||
pass
|
||||
|
||||
def gradients_X(self, dL_dF, X):
|
||||
return dL_dF
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,9 +1,10 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2015, James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..core.mapping import Mapping
|
||||
import GPy
|
||||
from ..core import Param
|
||||
|
||||
class Kernel(Mapping):
|
||||
"""
|
||||
|
|
@ -11,50 +12,41 @@ class Kernel(Mapping):
|
|||
|
||||
.. math::
|
||||
|
||||
f(\mathbf{x}*) = \mathbf{A}\mathbf{k}(\mathbf{X}, \mathbf{x}^*) + \mathbf{b}
|
||||
f(\mathbf{x}) = \sum_i \alpha_i k(\mathbf{z}_i, \mathbf{x})
|
||||
|
||||
:param X: input observations containing :math:`\mathbf{X}`
|
||||
:type X: ndarray
|
||||
or for multple outputs
|
||||
|
||||
.. math::
|
||||
|
||||
f_i(\mathbf{x}) = \sum_j \alpha_{i,j} k(\mathbf{z}_i, \mathbf{x})
|
||||
|
||||
|
||||
:param input_dim: dimension of input.
|
||||
:type input_dim: int
|
||||
:param output_dim: dimension of output.
|
||||
:type output_dim: int
|
||||
:param Z: input observations containing :math:`\mathbf{Z}`
|
||||
:type Z: ndarray
|
||||
:param kernel: a GPy kernel, defaults to GPy.kern.RBF
|
||||
:type kernel: GPy.kern.kern
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, X, output_dim=1, kernel=None):
|
||||
Mapping.__init__(self, input_dim=X.shape[1], output_dim=output_dim)
|
||||
if kernel is None:
|
||||
kernel = GPy.kern.RBF(self.input_dim)
|
||||
def __init__(self, input_dim, output_dim, Z, kernel, name='kernmap'):
|
||||
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
|
||||
self.kern = kernel
|
||||
self.X = X
|
||||
self.num_data = X.shape[0]
|
||||
self.num_params = self.output_dim*(self.num_data + 1)
|
||||
self.A = np.array((self.num_data, self.output_dim))
|
||||
self.bias = np.array(self.output_dim)
|
||||
self.randomize()
|
||||
self.name = 'kernel'
|
||||
def _get_param_names(self):
|
||||
return sum([['A_%i_%i' % (n, d) for d in range(self.output_dim)] for n in range(self.num_data)], []) + ['bias_%i' % d for d in range(self.output_dim)]
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.A.flatten(), self.bias))
|
||||
|
||||
def _set_params(self, x):
|
||||
self.A = x[:self.num_data * self.output_dim].reshape(self.num_data, self.output_dim).copy()
|
||||
self.bias = x[self.num_data*self.output_dim:].copy()
|
||||
|
||||
def randomize(self):
|
||||
self.A = np.random.randn(self.num_data, self.output_dim)/np.sqrt(self.num_data+1)
|
||||
self.bias = np.random.randn(self.output_dim)/np.sqrt(self.num_data+1)
|
||||
self.Z = Z
|
||||
self.num_bases, Zdim = Z.shape
|
||||
assert Zdim == self.input_dim
|
||||
self.A = Param('A', np.random.randn(self.num_bases, self.output_dim))
|
||||
self.link_parameter(self.A)
|
||||
|
||||
def f(self, X):
|
||||
return np.dot(self.kern.K(X, self.X),self.A) + self.bias
|
||||
return np.dot(self.kern.K(X, self.Z), self.A)
|
||||
|
||||
def df_dtheta(self, dL_df, X):
|
||||
self._df_dA = (dL_df[:, :, None]*self.kern.K(X, self.X)[:, None, :]).sum(0).T
|
||||
self._df_dbias = (dL_df.sum(0))
|
||||
return np.hstack((self._df_dA.flatten(), self._df_dbias))
|
||||
def update_gradients(self, dL_dF, X):
|
||||
self.kern.update_gradients_full(np.dot(dL_dF, self.A.T), X, self.Z)
|
||||
self.A.gradient = np.dot( self.kern.K(self.Z, X), dL_dF)
|
||||
|
||||
def df_dX(self, dL_df, X):
|
||||
return self.kern.gradients_X((dL_df[:, None, :]*self.A[None, :, :]).sum(2), X, self.X)
|
||||
def gradients_X(self, dL_dF, X):
|
||||
return self.kern.gradients_X(np.dot(dL_dF, self.A.T), X, self.Z)
|
||||
|
|
|
|||
|
|
@ -1,43 +1,39 @@
|
|||
# Copyright (c) 2013, 2014 GPy authors (see AUTHORS.txt).
|
||||
# Copyright (c) 2015, James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy as np
|
||||
from ..core.mapping import Bijective_mapping
|
||||
from ..core.mapping import Mapping
|
||||
from ..core.parameterization import Param
|
||||
|
||||
class Linear(Bijective_mapping):
|
||||
class Linear(Mapping):
|
||||
"""
|
||||
Mapping based on a linear model.
|
||||
A Linear mapping.
|
||||
|
||||
.. math::
|
||||
|
||||
f(\mathbf{x}*) = \mathbf{W}\mathbf{x}^* + \mathbf{b}
|
||||
F(\mathbf{x}) = \mathbf{A} \mathbf{x})
|
||||
|
||||
:param X: input observations
|
||||
:type X: ndarray
|
||||
|
||||
:param input_dim: dimension of input.
|
||||
:type input_dim: int
|
||||
:param output_dim: dimension of output.
|
||||
:type output_dim: int
|
||||
:param kernel: a GPy kernel, defaults to GPy.kern.RBF
|
||||
:type kernel: GPy.kern.kern
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim=1, output_dim=1, name='linear'):
|
||||
Bijective_mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
|
||||
self.W = Param('W',np.array((self.input_dim, self.output_dim)))
|
||||
self.bias = Param('bias',np.array(self.output_dim))
|
||||
self.link_parameters(self.W, self.bias)
|
||||
def __init__(self, input_dim, output_dim, name='linmap'):
|
||||
Mapping.__init__(self, input_dim=input_dim, output_dim=output_dim, name=name)
|
||||
self.A = Param('A', np.random.randn(self.input_dim, self.output_dim))
|
||||
self.link_parameter(self.A)
|
||||
|
||||
def f(self, X):
|
||||
return np.dot(X,self.W) + self.bias
|
||||
return np.dot(X, self.A)
|
||||
|
||||
def g(self, f):
|
||||
V = np.linalg.solve(np.dot(self.W.T, self.W), W.T)
|
||||
return np.dot(f-self.bias, V)
|
||||
def update_gradients(self, dL_dF, X):
|
||||
self.A.gradient = np.dot( X.T, dL_dF)
|
||||
|
||||
def df_dtheta(self, dL_df, X):
|
||||
df_dW = (dL_df[:, :, None]*X[:, None, :]).sum(0).T
|
||||
df_dbias = (dL_df.sum(0))
|
||||
return np.hstack((df_dW.flatten(), df_dbias))
|
||||
|
||||
def dL_dX(self, partial, X):
|
||||
"""The gradient of L with respect to the inputs to the mapping, where L is a function that is dependent on the output of the mapping, f."""
|
||||
return (partial[:, None, :]*self.W[None, :, :]).sum(2)
|
||||
def gradients_X(self, dL_dF, X):
|
||||
return np.dot(dL_dF, self.A.T)
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue