manual merging with AS

This commit is contained in:
James Hensman 2015-04-16 12:45:04 +01:00
commit e88b8a88d1
16 changed files with 724 additions and 91 deletions

View file

@ -6,14 +6,13 @@ import sys
from .. import kern
from .model import Model
from .parameterization import ObsAr
from .model import Model
from .mapping import Mapping
from .parameterization import ObsAr
from .. import likelihoods
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from .parameterization.variational import VariationalPosterior
import logging
import warnings
from GPy.util.normalizer import MeanNorm
logger = logging.getLogger("GP")
@ -65,10 +64,14 @@ class GP(Model):
self.Y = ObsAr(Y)
self.Y_normalized = self.Y
assert Y.shape[0] == self.num_data
if Y.shape[0] != self.num_data:
#There can be cases where we want inputs than outputs, for example if we have multiple latent
#function values
warnings.warn("There are more rows in your input data X, \
than in your output data Y, be VERY sure this is what you want")
_, self.output_dim = self.Y.shape
#TODO: check the type of this is okay?
assert ((Y_metadata is None) or isinstance(Y_metadata, dict))
self.Y_metadata = Y_metadata
assert isinstance(kernel, kern.Kern)
@ -296,7 +299,7 @@ class GP(Model):
:type size: int.
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool.
:returns: Ysim: set of simulations
:returns: fsim: set of simulations
:rtype: np.ndarray (N x samples)
"""
m, v = self._raw_predict(X, full_cov=full_cov)
@ -304,11 +307,11 @@ class GP(Model):
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
if not full_cov:
Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
fsim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
else:
Ysim = np.random.multivariate_normal(m.flatten(), v, size).T
fsim = np.random.multivariate_normal(m.flatten(), v, size).T
return Ysim
return fsim
def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None):
"""
@ -324,16 +327,16 @@ class GP(Model):
:type noise_model: integer.
:returns: Ysim: set of simulations, a Numpy array (N x samples).
"""
Ysim = self.posterior_samples_f(X, size, full_cov=full_cov)
Ysim = self.likelihood.samples(Ysim, Y_metadata)
fsim = self.posterior_samples_f(X, size, full_cov=full_cov)
Ysim = self.likelihood.samples(fsim, Y_metadata)
return Ysim
def plot_f(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=True,
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx',
apply_link=False):
"""
Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
This is a call to plot with plot_raw=True.
@ -370,6 +373,8 @@ class GP(Model):
:type Y_metadata: dict
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
:param apply_link: if there is a link function of the likelihood, plot the link(f*) rather than f*
:type apply_link: boolean
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots
@ -382,7 +387,7 @@ class GP(Model):
which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata,
data_symbol=data_symbol, **kw)
data_symbol=data_symbol, apply_link=apply_link, **kw)
def plot(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],

View file

@ -256,7 +256,7 @@ class Model(Parameterized):
else:
optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model=self, max_iters=max_iters, **kwargs)
with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook) as vo:
opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)
vo.finish(opt)
@ -371,7 +371,12 @@ class Model(Parameterized):
f1 = self._objective(xx)
xx[xind] -= 2.*step
f2 = self._objective(xx)
df_ratio = np.abs((f1 - f2) / min(f1, f2))
#Avoid divide by zero, if any of the values are above 1e-15, otherwise both values are essentiall
#the same
if f1 > 1e-15 or f1 < -1e-15 or f2 > 1e-15 or f2 < -1e-15:
df_ratio = np.abs((f1 - f2) / min(f1, f2))
else:
df_ratio = 1.0
df_unstable = df_ratio < df_tolerance
numerical_gradient = (f1 - f2) / (2 * step)
if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]

View file

@ -64,3 +64,17 @@ class ExactGaussianInference(LatentFunctionInference):
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata)
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None):
"""
Leave one out error as found in
"Bayesian leave-one-out cross-validation approximations for Gaussian latent variable models"
Vehtari et al. 2014.
"""
g = posterior.woodbury_vector
c = posterior.woodbury_inv
c_diag = np.diag(c)[:, None]
neg_log_marginal_LOO = 0.5*np.log(2*np.pi) - 0.5*np.log(c_diag) + 0.5*(g**2)/c_diag
#believe from Predictive Approaches for Choosing Hyperparameters in Gaussian Processes
#this is the negative marginal LOO
return -neg_log_marginal_LOO

View file

@ -19,6 +19,7 @@ def warning_on_one_line(message, category, filename, lineno, file=None, line=Non
warnings.formatwarning = warning_on_one_line
from scipy import optimize
from . import LatentFunctionInference
from scipy.integrate import quad
class Laplace(LatentFunctionInference):
@ -39,6 +40,85 @@ class Laplace(LatentFunctionInference):
self.first_run = True
self._previous_Ki_fhat = None
def LOO(self, kern, X, Y, likelihood, posterior, Y_metadata=None, K=None, f_hat=None, W=None, Ki_W_i=None):
"""
Leave one out log predictive density as found in
"Bayesian leave-one-out cross-validation approximations for Gaussian latent variable models"
Vehtari et al. 2014.
"""
Ki_f_init = np.zeros_like(Y)
if K is None:
K = kern.K(X)
if f_hat is None:
f_hat, _ = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
if W is None:
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
if Ki_W_i is None:
_, _, _, Ki_W_i = self._compute_B_statistics(K, W, likelihood.log_concave)
logpdf_dfhat = likelihood.dlogpdf_df(f_hat, Y, Y_metadata=Y_metadata)
if W.shape[1] == 1:
W = np.diagflat(W)
#Eq 14, and 16
var_site = 1./np.diag(W)[:, None]
mu_site = f_hat + var_site*logpdf_dfhat
prec_site = 1./var_site
#Eq 19
marginal_cov = Ki_W_i
marginal_mu = marginal_cov.dot(np.diagflat(prec_site)).dot(mu_site)
marginal_var = np.diag(marginal_cov)[:, None]
#Eq 30 with using site parameters instead of Gaussian site parameters
#(var_site instead of sigma^{2} )
posterior_cav_var = 1./(1./marginal_var - 1./var_site)
posterior_cav_mean = posterior_cav_var*((1./marginal_var)*marginal_mu - (1./var_site)*Y)
flat_y = Y.flatten()
flat_mu = posterior_cav_mean.flatten()
flat_var = posterior_cav_var.flatten()
if Y_metadata is not None:
#Need to zip individual elements of Y_metadata aswell
Y_metadata_flat = {}
if Y_metadata is not None:
for key, val in Y_metadata.items():
Y_metadata_flat[key] = np.atleast_1d(val).reshape(-1, 1)
zipped_values = []
for i in range(Y.shape[0]):
y_m = {}
for key, val in Y_metadata_flat.items():
if np.isscalar(val) or val.shape[0] == 1:
y_m[key] = val
else:
#Won't broadcast yet
y_m[key] = val[i]
zipped_values.append((flat_y[i], flat_mu[i], flat_var[i], y_m))
else:
#Otherwise just pass along None's
zipped_values = zip(flat_y, flat_mu, flat_var, [None]*Y.shape[0])
def integral_generator(yi, mi, vi, yi_m):
def f(fi_star):
#More stable in the log space
p_fi = np.exp(likelihood.logpdf(fi_star, yi, yi_m)
- 0.5*np.log(2*np.pi*vi)
- 0.5*np.square(mi-fi_star)/vi)
return p_fi
return f
#Eq 30
p_ystar, _ = zip(*[quad(integral_generator(y, m, v, yi_m), -np.inf, np.inf)
for y, m, v, yi_m in zipped_values])
p_ystar = np.array(p_ystar).reshape(-1, 1)
return np.log(p_ystar)
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
"""
Returns a Posterior class containing essential quantities of the posterior

View file

@ -8,7 +8,7 @@ import itertools
def index_to_slices(index):
"""
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
e.g.
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
@ -79,10 +79,10 @@ class IndependentOutputs(CombinationKernel):
def update_gradients_full(self,dL_dK,X,X2=None):
slices = index_to_slices(X[:,self.index_dim])
if self.single_kern:
if self.single_kern:
target = np.zeros(self.kern.size)
kerns = itertools.repeat(self.kern)
else:
else:
kerns = self.kern
target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
def collate_grads(kern, i, dL, X, X2):
@ -94,7 +94,7 @@ class IndependentOutputs(CombinationKernel):
else:
slices2 = index_to_slices(X2[:,self.index_dim])
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
if self.single_kern:
if self.single_kern:
self.kern.gradient = target
else:
[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
@ -104,12 +104,14 @@ class IndependentOutputs(CombinationKernel):
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if X2 is None:
# TODO: make use of index_to_slices
# FIXME: Broken as X is already sliced out
print "Warning, gradients_X may not be working, I believe X has already been sliced out by the slicer!"
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
for kern, s in zip(kerns, slices)]
#slices = index_to_slices(X[:,self.index_dim])
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
# for s in slices_i] for kern, slices_i in zip(kerns, slices)]
#import ipdb;ipdb.set_trace()
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),

View file

@ -132,10 +132,8 @@ class Gaussian(Likelihood):
:returns: log likelihood evaluated for this point
:rtype: float
"""
N = y.shape[0]
ln_det_cov = np.log(self.variance)
return -0.5*((y-link_f)**2/self.variance + ln_det_cov + np.log(2.*np.pi))
return -(1.0/(2*self.variance))*((y-link_f)**2) - 0.5*ln_det_cov - 0.5*np.log(2.*np.pi)
def dlogpdf_dlink(self, link_f, y, Y_metadata=None):
"""
@ -220,7 +218,6 @@ class Gaussian(Likelihood):
"""
e = y - link_f
s_4 = 1.0/(self.variance**2)
N = y.shape[0]
dlik_dsigma = -0.5/self.variance + 0.5*s_4*np.square(e)
return dlik_dsigma

View file

@ -114,21 +114,24 @@ class Likelihood(Parameterized):
#Otherwise just pass along None's
zipped_values = zip(flat_y_test, flat_mu_star, flat_var_star, [None]*y_test.shape[0])
def integral_generator(y, m, v, y_m):
"""Generate a function which can be integrated to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
def f(f_star):
def integral_generator(yi, mi, vi, yi_m):
"""Generate a function which can be integrated
to give p(Y*|Y) = int p(Y*|f*)p(f*|Y) df*"""
def f(fi_star):
#exponent = np.exp(-(1./(2*v))*np.square(m-f_star))
#from GPy.util.misc import safe_exp
#exponent = safe_exp(exponent)
#return self.pdf(f_star, y, y_m)*exponent
#More stable in the log space
return np.exp(self.logpdf(f_star, y, y_m) -(1./(2*v))*np.square(m-f_star))
return np.exp(self.logpdf(fi_star, yi, yi_m)
- 0.5*np.log(2*np.pi*vi)
- 0.5*np.square(mi-fi_star)/vi)
return f
scaled_p_ystar, accuracy = zip(*[quad(integral_generator(y, m, v, y_m), -np.inf, np.inf) for y, m, v, y_m in zipped_values])
scaled_p_ystar = np.array(scaled_p_ystar).reshape(-1,1)
p_ystar = scaled_p_ystar/np.sqrt(2*np.pi*var_star)
p_ystar, _ = zip(*[quad(integral_generator(yi, mi, vi, yi_m), -np.inf, np.inf)
for yi, mi, vi, yi_m in zipped_values])
p_ystar = np.array(p_ystar).reshape(-1, 1)
return np.log(p_ystar)
def _moments_match_ep(self,obs,tau,v):
@ -545,9 +548,9 @@ class Likelihood(Parameterized):
#Parameters are stacked vertically. Must be listed in same order as 'get_param_names'
# ensure we have gradients for every parameter we want to optimize
assert dlogpdf_dtheta.shape[0] == self.size #f, d x num_param array
assert dlogpdf_df_dtheta.shape[0] == self.size #f x d x num_param matrix or just f x num_param
assert d2logpdf_df2_dtheta.shape[0] == self.size #f x num_param matrix or f x d x num_param matrix, f x f x num_param or f x f x d x num_param
assert dlogpdf_dtheta.shape[0] == self.size #num_param array x f, d
assert dlogpdf_df_dtheta.shape[0] == self.size #num_param x f x d x matrix or just num_param x f
assert d2logpdf_df2_dtheta.shape[0] == self.size #num_param x f matrix or num_param x f x d x matrix, num_param x f x f or num_param x f x f x d
return dlogpdf_dtheta, dlogpdf_df_dtheta, d2logpdf_df2_dtheta

View file

@ -4,9 +4,7 @@
import numpy as np
from ..util.univariate_Gaussian import std_norm_cdf, std_norm_pdf
import scipy as sp
_exp_lim_val = np.finfo(np.float64).max
_lim_val = np.log(_exp_lim_val)
from ..util.misc import safe_exp, safe_square, safe_cube, safe_quad, safe_three_times
class GPTransformation(object):
"""
@ -63,12 +61,13 @@ class Identity(GPTransformation):
def d3transf_df3(self,f):
return np.zeros_like(f)
class Probit(GPTransformation):
"""
.. math::
g(f) = \\Phi^{-1} (mu)
"""
def transf(self,f):
return std_norm_cdf(f)
@ -80,7 +79,7 @@ class Probit(GPTransformation):
return -f * std_norm_pdf(f)
def d3transf_df3(self,f):
return (np.square(f)-1.)*std_norm_pdf(f)
return (safe_square(f)-1.)*std_norm_pdf(f)
class Cloglog(GPTransformation):
@ -96,19 +95,23 @@ class Cloglog(GPTransformation):
"""
def transf(self,f):
return 1-np.exp(-np.exp(f))
ef = safe_exp(f)
return 1-np.exp(-ef)
def dtransf_df(self,f):
return np.exp(f-np.exp(f))
ef = safe_exp(f)
return np.exp(f-ef)
def d2transf_df2(self,f):
ef = np.exp(f)
ef = safe_exp(f)
return -np.exp(f-ef)*(ef-1.)
def d3transf_df3(self,f):
ef = np.exp(f)
return np.exp(f-ef)*(1.-3*ef + ef**2)
ef = safe_exp(f)
ef2 = safe_square(ef)
three_times_ef = safe_three_times(ef)
r_val = np.exp(f-ef)*(1.-three_times_ef + ef2)
return r_val
class Log(GPTransformation):
"""
@ -118,16 +121,16 @@ class Log(GPTransformation):
"""
def transf(self,f):
return np.exp(np.clip(f, -np.inf, _lim_val))
return safe_exp(f)
def dtransf_df(self,f):
return np.exp(np.clip(f, -np.inf, _lim_val))
return safe_exp(f)
def d2transf_df2(self,f):
return np.exp(np.clip(f, -np.inf, _lim_val))
return safe_exp(f)
def d3transf_df3(self,f):
return np.exp(np.clip(f, -np.inf, _lim_val))
return safe_exp(f)
class Log_ex_1(GPTransformation):
"""
@ -137,17 +140,20 @@ class Log_ex_1(GPTransformation):
"""
def transf(self,f):
return np.log(1.+np.exp(f))
return np.log1p(safe_exp(f))
def dtransf_df(self,f):
return np.exp(f)/(1.+np.exp(f))
ef = safe_exp(f)
return ef/(1.+ef)
def d2transf_df2(self,f):
aux = np.exp(f)/(1.+np.exp(f))
ef = safe_exp(f)
aux = ef/(1.+ef)
return aux*(1.-aux)
def d3transf_df3(self,f):
aux = np.exp(f)/(1.+np.exp(f))
ef = safe_exp(f)
aux = ef/(1.+ef)
daux_df = aux*(1.-aux)
return daux_df - (2.*aux*daux_df)
@ -155,14 +161,17 @@ class Reciprocal(GPTransformation):
def transf(self,f):
return 1./f
def dtransf_df(self,f):
return -1./(f**2)
def dtransf_df(self, f):
f2 = safe_square(f)
return -1./f2
def d2transf_df2(self,f):
return 2./(f**3)
def d2transf_df2(self, f):
f3 = safe_cube(f)
return 2./f3
def d3transf_df3(self,f):
return -6./(f**4)
f4 = safe_quad(f)
return -6./f4
class Heaviside(GPTransformation):
"""

View file

@ -11,7 +11,7 @@ from .sparse_gplvm import SparseGPLVM
from .warped_gp import WarpedGP
from .bayesian_gplvm import BayesianGPLVM
from .mrd import MRD
from .gradient_checker import GradientChecker
from .gradient_checker import GradientChecker, HessianChecker, SkewChecker
from .ss_gplvm import SSGPLVM
from .gp_coregionalized_regression import GPCoregionalizedRegression
from .sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression

View file

@ -24,7 +24,7 @@ class BayesianGPLVM(SparseGP_MPI):
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None,
name='bayesian gplvm', mpi_comm=None, normalizer=None,
missing_data=False, stochastic=False, batchsize=1):
missing_data=False, stochastic=False, batchsize=1, Y_metadata=None):
self.logger = logging.getLogger(self.__class__.__name__)
if X is None:
@ -69,6 +69,7 @@ class BayesianGPLVM(SparseGP_MPI):
name=name, inference_method=inference_method,
normalizer=normalizer, mpi_comm=mpi_comm,
variational_prior=self.variational_prior,
Y_metadata=Y_metadata
)
self.link_parameter(self.X, index=0)
@ -83,7 +84,7 @@ class BayesianGPLVM(SparseGP_MPI):
def parameters_changed(self):
super(BayesianGPLVM,self).parameters_changed()
if isinstance(self.inference_method, VarDTC_minibatch):
return
return
kl_fctr = 1.
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)

View file

@ -5,6 +5,8 @@ from ..core.model import Model
import itertools
import numpy
from ..core.parameterization import Param
np = numpy
from ..util.block_matrices import get_blocks, get_block_shapes, unblock, get_blocks_3d, get_block_shapes_3d
def get_shape(x):
if isinstance(x, numpy.ndarray):
@ -111,3 +113,261 @@ class GradientChecker(Model):
#for name, shape in zip(self.names, self.shapes):
#_param_names.extend(map(lambda nameshape: ('_'.join(nameshape)).strip('_'), itertools.izip(itertools.repeat(name), itertools.imap(lambda t: '_'.join(map(str, t)), itertools.product(*map(lambda xi: range(xi), shape))))))
#return _param_names
class HessianChecker(GradientChecker):
def __init__(self, f, df, ddf, x0, names=None, *args, **kwargs):
"""
:param f: Function (only used for numerical hessian gradient)
:param df: Gradient of function to check
:param ddf: Analytical gradient function
:param x0:
Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
Can be a list of arrays, if takes a list of arrays. This list will be passed
to f and df in the same order as given here.
If only one argument, make sure not to pass a list!!!
:type x0: [array-like] | array-like | float | int
:param names:
Names to print, when performing gradcheck. If a list was passed to x0
a list of names with the same length is expected.
:param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
"""
super(HessianChecker, self).__init__(df, ddf, x0, names=names, *args, **kwargs)
self._f = f
self._df = df
self._ddf = ddf
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False):
"""
Overwrite checkgrad method to check whole block instead of looping through
Shows diagnostics using matshow instead
:param verbose: If True, print a "full" checking of each parameter
:type verbose: bool
:param step: The size of the step around which to linearise the objective
:type step: float (default 1e-6)
:param tolerance: the tolerance allowed (see note)
:type tolerance: float (default 1e-3)
Note:-
The gradient is considered correct if the ratio of the analytical
and numerical gradients is within <tolerance> of unity.
"""
try:
import numdifftools as nd
except:
raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests")
if target_param:
raise NotImplementedError('Only basic functionality is provided with this gradchecker')
#Repeat for each parameter, not the nicest but shouldn't be many cases where there are many
#variables
current_index = 0
for name, shape in zip(self.names, self.shapes):
current_size = numpy.prod(shape)
x = self.optimizer_array.copy()
#x = self._get_params_transformed().copy()
x = x[current_index:current_index + current_size].reshape(shape)
# Check gradients
analytic_hess = self._ddf(x)
if analytic_hess.shape[1] == 1:
analytic_hess = numpy.diagflat(analytic_hess)
#From the docs:
#x0 : vector location
#at which to differentiate fun
#If x0 is an N x M array, then fun is assumed to be a function
#of N*M variables., thus we must have it flat, not (N,1), but just (N,)
#numeric_hess_partial = nd.Hessian(self._f, vectorized=False)
numeric_hess_partial = nd.Jacobian(self._df, vectorized=False)
#numeric_hess_partial = nd.Derivative(self._df, vectorized=True)
numeric_hess = numeric_hess_partial(x)
check_passed = self.checkgrad_block(analytic_hess, numeric_hess, verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=plot)
current_index += current_size
return check_passed
def checkgrad_block(self, analytic_hess, numeric_hess, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False):
"""
Checkgrad a block matrix
"""
if analytic_hess.dtype is np.dtype('object'):
#Make numeric hessian also into a block matrix
real_size = get_block_shapes(analytic_hess)
num_elements = np.sum(real_size)
if (num_elements, num_elements) == numeric_hess.shape:
#If the sizes are the same we assume they are the same
#(we have not fixed any values so the numeric is the whole hessian)
numeric_hess = get_blocks(numeric_hess, real_size)
else:
#Make a fake empty matrix and fill out the correct block
tmp_numeric_hess = get_blocks(np.zeros((num_elements, num_elements)), real_size)
tmp_numeric_hess[block_indices] = numeric_hess.copy()
numeric_hess = tmp_numeric_hess
if block_indices is not None:
#Extract the right block
analytic_hess = analytic_hess[block_indices]
numeric_hess = numeric_hess[block_indices]
else:
#Unblock them if they are in blocks and you aren't checking a single block (checking whole hessian)
if analytic_hess.dtype is np.dtype('object'):
analytic_hess = unblock(analytic_hess)
numeric_hess = unblock(numeric_hess)
ratio = numeric_hess / (numpy.where(analytic_hess==0, 1e-10, analytic_hess))
difference = numpy.abs(analytic_hess - numeric_hess)
check_passed = numpy.all((numpy.abs(1 - ratio)) < tolerance) or numpy.allclose(numeric_hess, analytic_hess, atol = tolerance)
if verbose:
if block_indices:
print "\nBlock {}".format(block_indices)
else:
print "\nAll blocks"
header = ['Checked', 'Max-Ratio', 'Min-Ratio', 'Min-Difference', 'Max-Difference']
header_string = map(lambda x: ' | '.join(header), [header])
separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator])
min_r = '%.6f' % float(numpy.min(ratio))
max_r = '%.6f' % float(numpy.max(ratio))
max_d = '%.6f' % float(numpy.max(difference))
min_d = '%.6f' % float(numpy.min(difference))
cols = [max_r, min_r, min_d, max_d]
if check_passed:
checked = "\033[92m True \033[0m"
else:
checked = "\033[91m False \033[0m"
grad_string = "{} | {} | {} | {} | {} ".format(checked, cols[0], cols[1], cols[2], cols[3])
print grad_string
if plot:
import pylab as pb
fig, axes = pb.subplots(2, 2)
max_lim = numpy.max(numpy.vstack((analytic_hess, numeric_hess)))
min_lim = numpy.min(numpy.vstack((analytic_hess, numeric_hess)))
msa = axes[0,0].matshow(analytic_hess, vmin=min_lim, vmax=max_lim)
axes[0,0].set_title('Analytic hessian')
axes[0,0].xaxis.set_ticklabels([None])
axes[0,0].yaxis.set_ticklabels([None])
axes[0,0].xaxis.set_ticks([None])
axes[0,0].yaxis.set_ticks([None])
msn = axes[0,1].matshow(numeric_hess, vmin=min_lim, vmax=max_lim)
pb.colorbar(msn, ax=axes[0,1])
axes[0,1].set_title('Numeric hessian')
axes[0,1].xaxis.set_ticklabels([None])
axes[0,1].yaxis.set_ticklabels([None])
axes[0,1].xaxis.set_ticks([None])
axes[0,1].yaxis.set_ticks([None])
msr = axes[1,0].matshow(ratio)
pb.colorbar(msr, ax=axes[1,0])
axes[1,0].set_title('Ratio')
axes[1,0].xaxis.set_ticklabels([None])
axes[1,0].yaxis.set_ticklabels([None])
axes[1,0].xaxis.set_ticks([None])
axes[1,0].yaxis.set_ticks([None])
msd = axes[1,1].matshow(difference)
pb.colorbar(msd, ax=axes[1,1])
axes[1,1].set_title('difference')
axes[1,1].xaxis.set_ticklabels([None])
axes[1,1].yaxis.set_ticklabels([None])
axes[1,1].xaxis.set_ticks([None])
axes[1,1].yaxis.set_ticks([None])
if block_indices:
fig.suptitle("Block: {}".format(block_indices))
pb.show()
return check_passed
class SkewChecker(HessianChecker):
def __init__(self, df, ddf, dddf, x0, names=None, *args, **kwargs):
"""
:param df: gradient of function
:param ddf: Gradient of function to check (hessian)
:param dddf: Analytical gradient function (third derivative)
:param x0:
Initial guess for inputs x (if it has a shape (a,b) this will be reflected in the parameter names).
Can be a list of arrays, if takes a list of arrays. This list will be passed
to f and df in the same order as given here.
If only one argument, make sure not to pass a list!!!
:type x0: [array-like] | array-like | float | int
:param names:
Names to print, when performing gradcheck. If a list was passed to x0
a list of names with the same length is expected.
:param args: Arguments passed as f(x, *args, **kwargs) and df(x, *args, **kwargs)
"""
super(SkewChecker, self).__init__(df, ddf, dddf, x0, names=names, *args, **kwargs)
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, block_indices=None, plot=False, super_plot=False):
"""
Gradient checker that just checks each hessian individually
super_plot will plot the hessian wrt every parameter, plot will just do the first one
"""
try:
import numdifftools as nd
except:
raise ImportError("Don't have numdifftools package installed, it is not a GPy dependency as of yet, it is only used for hessian tests")
if target_param:
raise NotImplementedError('Only basic functionality is provided with this gradchecker')
#Repeat for each parameter, not the nicest but shouldn't be many cases where there are many
#variables
current_index = 0
for name, n_shape in zip(self.names, self.shapes):
current_size = numpy.prod(n_shape)
x = self.optimizer_array.copy()
#x = self._get_params_transformed().copy()
x = x[current_index:current_index + current_size].reshape(n_shape)
# Check gradients
#Actually the third derivative
analytic_hess = self._ddf(x)
#Can only calculate jacobian for one variable at a time
#From the docs:
#x0 : vector location
#at which to differentiate fun
#If x0 is an N x M array, then fun is assumed to be a function
#of N*M variables., thus we must have it flat, not (N,1), but just (N,)
#numeric_hess_partial = nd.Hessian(self._f, vectorized=False)
#Actually _df is already the hessian
numeric_hess_partial = nd.Jacobian(self._df, vectorized=True)
numeric_hess = numeric_hess_partial(x)
print "Done making numerical hessian"
if analytic_hess.dtype is np.dtype('object'):
#Blockify numeric_hess aswell
blocksizes, pagesizes = get_block_shapes_3d(analytic_hess)
#HACK
real_block_size = np.sum(blocksizes)
numeric_hess = numeric_hess.reshape(real_block_size, real_block_size, pagesizes)
#numeric_hess = get_blocks_3d(numeric_hess, blocksizes)#, pagesizes)
else:
numeric_hess = numeric_hess.reshape(*analytic_hess.shape)
#Check every block individually (for ease)
check_passed = [False]*numeric_hess.shape[2]
for block_ind in xrange(numeric_hess.shape[2]):
#Unless super_plot is set, just plot the first one
p = True if (plot and block_ind == numeric_hess.shape[2]-1) or super_plot else False
if verbose:
print "Checking derivative of hessian wrt parameter number {}".format(block_ind)
check_passed[block_ind] = self.checkgrad_block(analytic_hess[:,:,block_ind], numeric_hess[:,:,block_ind], verbose=verbose, step=step, tolerance=tolerance, block_indices=block_indices, plot=p)
current_index += current_size
return np.all(check_passed)

View file

@ -1,4 +1,4 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Copyright (c) 2012-2015, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
try:
@ -16,7 +16,8 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx'):
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'], Y_metadata=None, data_symbol='kx',
apply_link=False, samples_f=0, plot_uncertain_inputs=True):
"""
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
@ -38,7 +39,7 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:type levels: int
:param samples: the number of a posteriori samples to plot
:param samples: the number of a posteriori samples to plot p(y*|y)
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
@ -49,6 +50,10 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
:type linecol:
:param fillcol: color of fill
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:param apply_link: apply the link function if plotting f (default false)
:type apply_link: boolean
:param samples_f: the number of posteriori f samples to plot p(f*|y)
:type samples_f: int
"""
#deal with optional arguments
if which_data_rows == 'all':
@ -88,8 +93,14 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
#make a prediction on the frame and plot it
if plot_raw:
m, v = model._raw_predict(Xgrid)
lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v)
if apply_link:
lower = model.likelihood.gp_link.transf(m - 2*np.sqrt(v))
upper = model.likelihood.gp_link.transf(m + 2*np.sqrt(v))
#Once transformed this is now the median of the function
m = model.likelihood.gp_link.transf(m)
else:
lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v)
else:
if isinstance(model,GPCoregionalizedRegression) or isinstance(model,SparseGPCoregionalizedRegression):
meta = {'output_index': Xgrid[:,-1:].astype(np.int)}
@ -110,13 +121,31 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
plots['posterior_samples'] = ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
if samples_f: #NOTE not tested with fixed_inputs
Fsim = model.posterior_samples_f(Xgrid, samples_f)
for fi in Fsim.T:
plots['posterior_samples_f'] = ax.plot(Xnew, fi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
#add error bars for uncertain (if input uncertainty is being modelled)
if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs():
plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
if hasattr(model,"has_uncertain_inputs") and model.has_uncertain_inputs() and plot_uncertain_inputs:
if plot_raw:
#add error bars for uncertain (if input uncertainty is being modelled), for plot_f
#Hack to plot error bars on latent function, rather than on the data
vs = model.X.mean.values.copy()
for i,v in fixed_inputs:
vs[:,i] = v
m_X, _ = model._raw_predict(vs)
if apply_link:
m_X = model.likelihood.gp_link.transf(m_X)
plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), m_X[which_data_rows, which_data_ycols].flatten(),
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
else:
plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
#set the limits of the plot to some sensible values
ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))
@ -186,3 +215,29 @@ def plot_fit_f(model, *args, **kwargs):
"""
kwargs['plot_raw'] = True
plot_fit(model,*args, **kwargs)
def fixed_inputs(model, non_fixed_inputs, fix_routine='median'):
"""
Convenience function for returning back fixed_inputs where the other inputs
are fixed using fix_routine
:param model: model
:type model: Model
:param non_fixed_inputs: dimensions of non fixed inputs
:type non_fixed_inputs: list
:param fix_routine: fixing routine to use, 'mean', 'median', 'zero'
:type fix_routine: string
"""
f_inputs = []
if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean.values.copy()
else:
X = model.X.values.copy()
for i in range(X.shape[1]):
if i not in non_fixed_inputs:
if fix_routine == 'mean':
f_inputs.append( (i, np.mean(X[:,i])) )
if fix_routine == 'median':
f_inputs.append( (i, np.median(X[:,i])) )
elif fix_routine == 'zero':
f_inputs.append( (i, 0) )
return f_inputs

View file

@ -0,0 +1,143 @@
import numpy as np
import scipy as sp
from scipy.special import cbrt
from GPy.models import GradientChecker
_lim_val = np.finfo(np.float64).max
_lim_val_exp = np.log(_lim_val)
_lim_val_square = np.sqrt(_lim_val)
_lim_val_cube = cbrt(_lim_val)
from GPy.likelihoods.link_functions import Identity, Probit, Cloglog, Log, Log_ex_1, Reciprocal, Heaviside
class LinkFunctionTests(np.testing.TestCase):
def setUp(self):
self.small_f = np.array([[-1e-4]])
self.zero_f = np.array([[1e-4]])
self.mid_f = np.array([[5.0]])
self.large_f = np.array([[1e4]])
self.f_lower_lim = np.array(-np.inf)
self.f_upper_lim = np.array(np.inf)
def check_gradient(self, link_func, lim_of_inf, test_lim=False):
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.mid_f)
self.assertTrue(grad.checkgrad(verbose=True))
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.mid_f)
self.assertTrue(grad2.checkgrad(verbose=True))
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.mid_f)
self.assertTrue(grad3.checkgrad(verbose=True))
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.small_f)
self.assertTrue(grad.checkgrad(verbose=True))
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.small_f)
self.assertTrue(grad2.checkgrad(verbose=True))
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.small_f)
self.assertTrue(grad3.checkgrad(verbose=True))
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=self.zero_f)
self.assertTrue(grad.checkgrad(verbose=True))
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=self.zero_f)
self.assertTrue(grad2.checkgrad(verbose=True))
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=self.zero_f)
self.assertTrue(grad3.checkgrad(verbose=True))
#Do a limit test if the large f value is too large
large_f = np.clip(self.large_f, -np.inf, lim_of_inf-1e-3)
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=large_f)
self.assertTrue(grad.checkgrad(verbose=True))
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=large_f)
self.assertTrue(grad2.checkgrad(verbose=True))
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=large_f)
self.assertTrue(grad3.checkgrad(verbose=True))
if test_lim:
print "Testing limits"
#Remove some otherwise we are too close to the limit for gradcheck to work effectively
lim_of_inf = lim_of_inf - 1e-4
grad = GradientChecker(link_func.transf, link_func.dtransf_df, x0=lim_of_inf)
self.assertTrue(grad.checkgrad(verbose=True))
grad2 = GradientChecker(link_func.dtransf_df, link_func.d2transf_df2, x0=lim_of_inf)
self.assertTrue(grad2.checkgrad(verbose=True))
grad3 = GradientChecker(link_func.d2transf_df2, link_func.d3transf_df3, x0=lim_of_inf)
self.assertTrue(grad3.checkgrad(verbose=True))
def check_overflow(self, link_func, lim_of_inf):
#Check that it does something sensible beyond this limit,
#note this is not checking the value is correct, just that it isn't nan
beyond_lim_of_inf = lim_of_inf + 100.0
self.assertFalse(np.isinf(link_func.transf(beyond_lim_of_inf)))
self.assertFalse(np.isinf(link_func.dtransf_df(beyond_lim_of_inf)))
self.assertFalse(np.isinf(link_func.d2transf_df2(beyond_lim_of_inf)))
self.assertFalse(np.isnan(link_func.transf(beyond_lim_of_inf)))
self.assertFalse(np.isnan(link_func.dtransf_df(beyond_lim_of_inf)))
self.assertFalse(np.isnan(link_func.d2transf_df2(beyond_lim_of_inf)))
def test_log_overflow(self):
link = Log()
lim_of_inf = _lim_val_exp
np.testing.assert_almost_equal(np.exp(self.mid_f), link.transf(self.mid_f))
assert np.isinf(np.exp(np.log(self.f_upper_lim)))
#Check the clipping works
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
#Need to look at most significant figures here rather than the decimals
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), _lim_val, significant=5)
self.check_overflow(link, lim_of_inf)
#Check that it would otherwise fail
beyond_lim_of_inf = lim_of_inf + 10.0
old_err_state = np.seterr(over='ignore')
self.assertTrue(np.isinf(np.exp(beyond_lim_of_inf)))
np.seterr(**old_err_state)
def test_log_ex_1_overflow(self):
link = Log_ex_1()
lim_of_inf = _lim_val_exp
np.testing.assert_almost_equal(np.log1p(np.exp(self.mid_f)), link.transf(self.mid_f))
assert np.isinf(np.log1p(np.exp(np.log(self.f_upper_lim))))
#Check the clipping works
np.testing.assert_almost_equal(link.transf(self.f_lower_lim), 0, decimal=5)
#Need to look at most significant figures here rather than the decimals
np.testing.assert_approx_equal(link.transf(self.f_upper_lim), np.log1p(_lim_val), significant=5)
self.check_overflow(link, lim_of_inf)
#Check that it would otherwise fail
beyond_lim_of_inf = lim_of_inf + 10.0
old_err_state = np.seterr(over='ignore')
self.assertTrue(np.isinf(np.log1p(np.exp(beyond_lim_of_inf))))
np.seterr(**old_err_state)
def test_log_gradients(self):
# transf dtransf_df d2transf_df2 d3transf_df3
link = Log()
lim_of_inf = _lim_val_exp
self.check_gradient(link, lim_of_inf, test_lim=True)
def test_identity_gradients(self):
link = Identity()
lim_of_inf = _lim_val
#FIXME: Should be able to think of a way to test the limits of this
self.check_gradient(link, lim_of_inf, test_lim=False)
def test_probit_gradients(self):
link = Probit()
lim_of_inf = _lim_val
self.check_gradient(link, lim_of_inf, test_lim=True)
def test_Cloglog_gradients(self):
link = Cloglog()
lim_of_inf = _lim_val_exp
self.check_gradient(link, lim_of_inf, test_lim=True)
def test_Log_ex_1_gradients(self):
link = Log_ex_1()
lim_of_inf = _lim_val_exp
self.check_gradient(link, lim_of_inf, test_lim=True)
self.check_overflow(link, lim_of_inf)
def test_reciprocal_gradients(self):
link = Reciprocal()
lim_of_inf = _lim_val
#Does not work with much smaller values, and values closer to zero than 1e-5
self.check_gradient(link, lim_of_inf, test_lim=True)

View file

@ -1,9 +1,37 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Copyright (c) 2014-2015, Alan Saul
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
def get_blocks_3d(A, blocksizes, pagesizes=None):
"""
Given a 3d matrix, make a block matrix, where the first and second dimensions are blocked according
to blocksizes, and the pages are blocked using pagesizes
"""
assert (A.shape[0]==A.shape[1]) and len(A.shape)==3, "can't blockify this non-square matrix, may need to use 2d version"
N = np.sum(blocksizes)
assert A.shape[0] == N, "bad blocksizes"
num_blocks = len(blocksizes)
if pagesizes == None:
#Assume each page of A should be its own dimension
pagesizes = range(A.shape[2])#[0]*A.shape[2]
num_pages = len(pagesizes)
B = np.empty(shape=(num_blocks, num_blocks, num_pages), dtype=np.object)
count_k = 0
#for Bk, k in enumerate(pagesizes):
for Bk in pagesizes:
count_i = 0
for Bi, i in enumerate(blocksizes):
count_j = 0
for Bj, j in enumerate(blocksizes):
#We want to have it count_k:count_k + k but its annoying as it makes a NxNx1 array is page sizes are set to 1
B[Bi, Bj, Bk] = A[count_i:count_i + i, count_j:count_j + j, Bk]
count_j += j
count_i += i
#count_k += k
return B
def get_blocks(A, blocksizes):
assert (A.shape[0]==A.shape[1]) and len(A.shape)==2, "can;t blockify this non-square matrix"
assert (A.shape[0]==A.shape[1]) and len(A.shape)==2, "can't blockify this non-square matrix"
N = np.sum(blocksizes)
assert A.shape[0] == N, "bad blocksizes"
num_blocks = len(blocksizes)
@ -17,6 +45,11 @@ def get_blocks(A, blocksizes):
count_i += i
return B
def get_block_shapes_3d(B):
assert B.dtype is np.dtype('object'), "Must be a block matrix"
#FIXME: This isn't general AT ALL...
return get_block_shapes(B[:,:,0]), B.shape[2]
def get_block_shapes(B):
assert B.dtype is np.dtype('object'), "Must be a block matrix"
return [B[b,b].shape[0] for b in range(0, B.shape[0])]
@ -35,7 +68,7 @@ def unblock(B):
count_i += i
return A
def block_dot(A, B):
def block_dot(A, B, diagonal=False):
"""
Element wise dot product on block matricies
@ -48,21 +81,30 @@ def block_dot(A, B):
+-------------+ +------+------+ +-------+-------+
..Note
If any block of either (A or B) are stored as 1d vectors then we assume
that it denotes a diagonal matrix efficient dot product using numpy
broadcasting will be used, i.e. A11*B11
If either (A or B) of the diagonal matrices are stored as vectors then a more
efficient dot product using numpy broadcasting will be used, i.e. A11*B11
"""
#Must have same number of blocks and be a block matrix
assert A.dtype is np.dtype('object'), "Must be a block matrix"
assert B.dtype is np.dtype('object'), "Must be a block matrix"
Ashape = A.shape
Bshape = B.shape
assert Ashape == Bshape
def f(A,B):
if Ashape[0] == Ashape[1] or Bshape[0] == Bshape[1]:
#FIXME: Careful if one is transpose of other, would make a matrix
return A*B
assert A.shape == B.shape
def f(C,D):
"""
C is an element of A, D is the associated element of B
"""
Cshape = C.shape
Dshape = D.shape
if diagonal and (len(Cshape) == 1 or len(Dshape) == 1\
or C.shape[0] != C.shape[1] or D.shape[0] != D.shape[1]):
print "Broadcasting, C: {} D:{}".format(C.shape, D.shape)
return C*D
else:
return np.dot(A,B)
print "Dotting, C: {} C:{}".format(C.shape, D.shape)
return np.dot(C,D)
dot = np.vectorize(f, otypes = [np.object])
return dot(A,B)

View file

@ -20,7 +20,7 @@ try:
from scipy import weave
except ImportError:
config.set('weave', 'working', 'False')
_scipyversion = np.float64((scipy.__version__).split('.')[:2])
_fix_dpotri_scipy_bug = True
@ -102,7 +102,6 @@ def jitchol(A, maxtries=5):
num_tries = 1
while num_tries <= maxtries and np.isfinite(jitter):
try:
print(jitter)
L = linalg.cholesky(A + np.eye(A.shape[0]) * jitter, lower=True)
return L
except:
@ -115,7 +114,6 @@ def jitchol(A, maxtries=5):
except:
logging.warning('\n'.join(['Added jitter of {:.10e}'.format(jitter),
' in '+traceback.format_list(traceback.extract_stack(limit=2)[-2:-1])[0][2:]]))
import ipdb;ipdb.set_trace()
return L
# def dtrtri(L, lower=1):

View file

@ -2,18 +2,37 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy.special import cbrt
from .config import *
_lim_val = np.finfo(np.float64).max
_lim_val_exp = np.log(_lim_val)
_lim_val_square = np.sqrt(_lim_val)
_lim_val_cube = np.power(_lim_val, -3)
#_lim_val_cube = cbrt(_lim_val)
_lim_val_cube = np.nextafter(_lim_val**(1/3.0), -np.inf)
_lim_val_quad = np.nextafter(_lim_val**(1/4.0), -np.inf)
_lim_val_three_times = np.nextafter(_lim_val/3.0, -np.inf)
def safe_exp(f):
clip_f = np.clip(f, -np.inf, _lim_val_exp)
return np.exp(clip_f)
def safe_square(f):
f = np.clip(f, -np.inf, _lim_val_square)
return f**2
def safe_cube(f):
f = np.clip(f, -np.inf, _lim_val_cube)
return f**3
def safe_quad(f):
f = np.clip(f, -np.inf, _lim_val_quad)
return f**4
def safe_three_times(f):
f = np.clip(f, -np.inf, _lim_val_three_times)
return 3*f
def chain_1(df_dg, dg_dx):
"""
Generic chaining function for first derivative
@ -39,8 +58,8 @@ def chain_2(d2f_dg2, dg_dx, df_dg, d2g_dx2):
return d2f_dg2
if len(d2f_dg2) > 1 and len(d2f_dg2.shape)>1 and d2f_dg2.shape[-1] > 1:
raise NotImplementedError('Not implemented for matricies yet')
#dg_dx_2 = np.clip(dg_dx, 1e-12, _lim_val_square)**2
dg_dx_2 = dg_dx**2
dg_dx_2 = np.clip(dg_dx, -np.inf, _lim_val_square)**2
#dg_dx_2 = dg_dx**2
return d2f_dg2*(dg_dx_2) + df_dg*d2g_dx2
def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
@ -55,8 +74,8 @@ def chain_3(d3f_dg3, dg_dx, d2f_dg2, d2g_dx2, df_dg, d3g_dx3):
if ( (len(d2f_dg2) > 1 and d2f_dg2.shape[-1] > 1)
or (len(d3f_dg3) > 1 and d3f_dg3.shape[-1] > 1)):
raise NotImplementedError('Not implemented for matricies yet')
#dg_dx_3 = np.clip(dg_dx, 1e-12, _lim_val_cube)**3
dg_dx_3 = dg_dx**3
dg_dx_3 = np.clip(dg_dx, -np.inf, _lim_val_cube)**3
#dg_dx_3 = dg_dx**3
return d3f_dg3*(dg_dx_3) + 3*d2f_dg2*dg_dx*d2g_dx2 + df_dg*d3g_dx3
def opt_wrapper(m, **kwargs):