Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Ricardo 2015-09-02 19:40:45 +01:00
commit 06f540584b
16 changed files with 411 additions and 159 deletions

View file

@ -106,6 +106,13 @@ class GP(Model):
self.link_parameter(self.likelihood) self.link_parameter(self.likelihood)
self.posterior = None self.posterior = None
# The predictive variable to be used to predict using the posterior object's
# woodbury_vector and woodbury_inv is defined as predictive_variable
# This is usually just a link to self.X (full GP) or self.Z (sparse GP).
# Make sure to name this variable and the predict functions will "just work"
# as long as the posterior has the right woodbury entries.
self._predictive_variable = self.X
def set_XY(self, X=None, Y=None, trigger_update=True): def set_XY(self, X=None, Y=None, trigger_update=True):
""" """
@ -209,6 +216,7 @@ class GP(Model):
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx)) var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3: elif self.posterior.woodbury_inv.ndim == 3:
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2])) var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
from ..util.linalg import mdot
for i in range(var.shape[2]): for i in range(var.shape[2]):
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx)) var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
var = var var = var
@ -304,6 +312,104 @@ class GP(Model):
return dmu_dX, dv_dX return dmu_dX, dv_dX
def predict_jacobian(self, Xnew, kern=None, full_cov=True):
"""
Compute the derivatives of the posterior of the GP.
Given a set of points at which to predict X* (size [N*,Q]), compute the
mean and variance of the derivative. Resulting arrays are sized:
dL_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
Note that this is the mean and variance of the derivative,
not the derivative of the mean and variance! (See predictive_gradients for that)
dv_dX* -- [N*, Q], (since all outputs have the same variance)
If there is missing data, it is not implemented for now, but
there will be one output variance per output dimension.
:param X: The points at which to get the predictive gradients.
:type X: np.ndarray (Xnew x self.input_dim)
:param kern: The kernel to compute the jacobian for.
:param boolean full_cov: whether to return the full covariance of the jacobian.
:returns: dmu_dX, dv_dX
:rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q,(D)) ]
Note: We always return sum in input_dim gradients, as the off-diagonals
in the input_dim are not needed for further calculations.
This is a compromise for increase in speed. Mathematically the jacobian would
have another dimension in Q.
"""
if kern is None:
kern = self.kern
mean_jac = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
for i in range(self.output_dim):
mean_jac[:,:,i] = kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self._predictive_variable)
dK_dXnew_full = np.empty((self._predictive_variable.shape[0], Xnew.shape[0], Xnew.shape[1]))
for i in range(self._predictive_variable.shape[0]):
dK_dXnew_full[i] = kern.gradients_X([[1.]], Xnew, self._predictive_variable[[i]])
def compute_cov_inner(wi):
if full_cov:
# full covariance gradients:
dK2_dXdX = kern.gradients_XX([[1.]], Xnew)
var_jac = dK2_dXdX - np.einsum('qnm,miq->niq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
else:
dK2_dXdX = kern.gradients_XX_diag([[1.]], Xnew)
var_jac = dK2_dXdX - np.einsum('qim,miq->iq', dK_dXnew_full.T.dot(wi), dK_dXnew_full)
return var_jac
if self.posterior.woodbury_inv.ndim == 3:
var_jac = []
for d in range(self.posterior.woodbury_inv.shape[2]):
var_jac.append(compute_cov_inner(self.posterior.woodbury_inv[:, :, d]))
var_jac = np.concatenate(var_jac)
else:
var_jac = compute_cov_inner(self.posterior.woodbury_inv)
return mean_jac, var_jac
def predict_wishard_embedding(self, Xnew, kern=None):
"""
Predict the wishard embedding G of the GP. This is the density of the
input of the GP defined by the probabilistic function mapping f.
G = J_mean.T*J_mean + output_dim*J_cov.
:param array-like Xnew: The points at which to evaluate the magnification.
:param :py:class:`~GPy.kern.Kern` kern: The kernel to use for the magnification.
Supplying only a part of the learning kernel gives insights into the density
of the specific kernel part of the input function. E.g. one can see how dense the
linear part of a kernel is compared to the non-linear part etc.
"""
if kern is None:
kern = self.kern
mu_jac, var_jac = self.predict_jacobian(Xnew, kern, full_cov=False)
mumuT = np.einsum('iqd,ipd->iqp', mu_jac, mu_jac)
if var_jac.ndim == 3:
Sigma = np.einsum('iqd,ipd->iqp', var_jac, var_jac)
G = mumuT + Sigma
else:
Sigma = np.einsum('iq,ip->iqp', var_jac, var_jac)
G = mumuT + self.output_dim*Sigma
return G
def predict_magnification(self, Xnew, kern=None):
"""
Predict the magnification factor as
sqrt(det(G))
for each point N in Xnew
"""
G = self.predict_wishard_embedding(Xnew, kern)
from ..util.linalg import jitchol
return np.array([np.sqrt(np.exp(2*np.sum(np.log(np.diag(jitchol(G[n, :, :])))))) for n in range(Xnew.shape[0])])
#return np.array([np.sqrt(np.linalg.det(G[n, :, :])) for n in range(Xnew.shape[0])])
def posterior_samples_f(self,X,size=10, full_cov=True): def posterior_samples_f(self,X,size=10, full_cov=True):
""" """
Samples the posterior GP at the points X. Samples the posterior GP at the points X.
@ -539,6 +645,22 @@ class GP(Model):
predict_kw, plot_training_data, **kw) predict_kw, plot_training_data, **kw)
def plot_magnification(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
fignum=None, legend=True,
plot_limits=None,
aspect='auto', updates=False, **kwargs):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_magnification(self, labels, which_indices,
resolution, ax, marker, s,
fignum, False, legend,
plot_limits, aspect, updates, **kwargs)
def input_sensitivity(self, summarize=True): def input_sensitivity(self, summarize=True):
""" """
Returns the sensitivity for each dimension of this model Returns the sensitivity for each dimension of this model

View file

@ -32,7 +32,7 @@ class Bijective_mapping(Mapping):
also back from f to X. The inverse mapping is called g(). also back from f to X. The inverse mapping is called g().
""" """
def __init__(self, input_dim, output_dim, name='bijective_mapping'): def __init__(self, input_dim, output_dim, name='bijective_mapping'):
super(Bijective_apping, self).__init__(name=name) super(Bijective_mapping, self).__init__(name=name)
def g(self, f): def g(self, f):
"""Inverse mapping from output domain of the function to the inputs.""" """Inverse mapping from output domain of the function to the inputs."""

View file

@ -59,6 +59,8 @@ class SparseGP(GP):
logger.info("Adding Z as parameter") logger.info("Adding Z as parameter")
self.link_parameter(self.Z, index=0) self.link_parameter(self.Z, index=0)
self.posterior = None self.posterior = None
self._predictive_variable = self.Z
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)
@ -114,10 +116,10 @@ class SparseGP(GP):
Make a prediction for the latent function values. Make a prediction for the latent function values.
For certain inputs we give back a full_cov of shape NxN, For certain inputs we give back a full_cov of shape NxN,
if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of, if there is missing data, each dimension has its own full_cov of shape NxNxD, and if full_cov is of,
we take only the diagonal elements across N. we take only the diagonal elements across N.
For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we For uncertain inputs, the SparseGP bound produces a full covariance structure across D, so for full_cov we
return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD). return a NxDxD matrix and in the not full_cov case, we return the diagonal elements across D (NxD).
This is for both with and without missing data. See for missing data SparseGP implementation py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'. This is for both with and without missing data. See for missing data SparseGP implementation py:class:'~GPy.models.sparse_gp_minibatch.SparseGPMiniBatch'.
""" """
@ -125,7 +127,7 @@ class SparseGP(GP):
if kern is None: kern = self.kern if kern is None: kern = self.kern
if not isinstance(Xnew, VariationalPosterior): if not isinstance(Xnew, VariationalPosterior):
Kx = kern.K(self.Z, Xnew) Kx = kern.K(self._predictive_variable, Xnew)
mu = np.dot(Kx.T, self.posterior.woodbury_vector) mu = np.dot(Kx.T, self.posterior.woodbury_vector)
if full_cov: if full_cov:
Kxx = kern.K(Xnew) Kxx = kern.K(Xnew)
@ -149,28 +151,28 @@ class SparseGP(GP):
if self.mean_function is not None: if self.mean_function is not None:
mu += self.mean_function.f(Xnew) mu += self.mean_function.f(Xnew)
else: else:
psi0_star = kern.psi0(self.Z, Xnew) psi0_star = kern.psi0(self._predictive_variable, Xnew)
psi1_star = kern.psi1(self.Z, Xnew) psi1_star = kern.psi1(self._predictive_variable, Xnew)
#psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code. #psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
la = self.posterior.woodbury_vector la = self.posterior.woodbury_vector
mu = np.dot(psi1_star, la) # TODO: dimensions? mu = np.dot(psi1_star, la) # TODO: dimensions?
if full_cov: if full_cov:
var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1])) var = np.empty((Xnew.shape[0], la.shape[1], la.shape[1]))
di = np.diag_indices(la.shape[1]) di = np.diag_indices(la.shape[1])
else: else:
var = np.empty((Xnew.shape[0], la.shape[1])) var = np.empty((Xnew.shape[0], la.shape[1]))
for i in range(Xnew.shape[0]): for i in range(Xnew.shape[0]):
_mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]] _mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
psi2_star = kern.psi2(self.Z, NormalPosterior(_mu, _var)) psi2_star = kern.psi2(self._predictive_variable, NormalPosterior(_mu, _var))
tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]])) tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
var_ = mdot(la.T, tmp, la) var_ = mdot(la.T, tmp, la)
p0 = psi0_star[i] p0 = psi0_star[i]
t = np.atleast_3d(self.posterior.woodbury_inv) t = np.atleast_3d(self.posterior.woodbury_inv)
t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2) t2 = np.trace(t.T.dot(psi2_star), axis1=1, axis2=2)
if full_cov: if full_cov:
var_[di] += p0 var_[di] += p0
var_[di] += -t2 var_[di] += -t2
@ -179,3 +181,18 @@ class SparseGP(GP):
var[i] = np.diag(var_)+p0-t2 var[i] = np.diag(var_)+p0-t2
return mu, var return mu, var
def plot_magnification(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
fignum=None, legend=True,
plot_limits=None,
aspect='auto', updates=False, plot_inducing=True, **kwargs):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_magnification(self, labels, which_indices,
resolution, ax, marker, s,
fignum, plot_inducing, legend,
plot_limits, aspect, updates, **kwargs)

View file

@ -24,7 +24,6 @@ class VerboseOptimization(object):
self.model.add_observer(self, self.print_status) self.model.add_observer(self, self.print_status)
self.status = 'running' self.status = 'running'
self.clear = clear_after_finish self.clear = clear_after_finish
self.deltat = .2
self.update() self.update()
@ -80,6 +79,7 @@ class VerboseOptimization(object):
def __enter__(self): def __enter__(self):
self.start = time.time() self.start = time.time()
self._time = self.start
return self return self
def print_out(self, seconds): def print_out(self, seconds):
@ -143,12 +143,12 @@ class VerboseOptimization(object):
def print_status(self, me, which=None): def print_status(self, me, which=None):
self.update() self.update()
seconds = time.time()-self.start t = time.time()
seconds = t-self.start
#sys.stdout.write(" "*len(self.message)) #sys.stdout.write(" "*len(self.message))
self.deltat += seconds if t-self._time > .3 or seconds < .3:
if self.deltat > .2:
self.print_out(seconds) self.print_out(seconds)
self.deltat = 0 self._time = t
self.iteration += 1 self.iteration += 1

View file

@ -11,7 +11,7 @@ class BasisFuncKernel(Kern):
def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'): def __init__(self, input_dim, variance=1., active_dims=None, ARD=False, name='basis func kernel'):
""" """
Abstract superclass for kernels with explicit basis functions for use in GPy. Abstract superclass for kernels with explicit basis functions for use in GPy.
This class does NOT automatically add an offset to the design matrix phi! This class does NOT automatically add an offset to the design matrix phi!
""" """
super(BasisFuncKernel, self).__init__(input_dim, active_dims, name) super(BasisFuncKernel, self).__init__(input_dim, active_dims, name)
@ -23,24 +23,24 @@ class BasisFuncKernel(Kern):
variance = np.array(variance) variance = np.array(variance)
self.variance = Param('variance', variance, Logexp()) self.variance = Param('variance', variance, Logexp())
self.link_parameter(self.variance) self.link_parameter(self.variance)
def parameters_changed(self): def parameters_changed(self):
self.alpha = np.sqrt(self.variance) self.alpha = np.sqrt(self.variance)
self.beta = 1./self.variance self.beta = 1./self.variance
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def phi(self, X): def phi(self, X):
return self._phi(X) return self._phi(X)
def _phi(self, X): def _phi(self, X):
raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi') raise NotImplementedError('Overwrite this _phi function, which maps the input X into the higher dimensional space and returns the design matrix Phi')
def K(self, X, X2=None): def K(self, X, X2=None):
return self._K(X, X2) return self._K(X, X2)
def Kdiag(self, X, X2=None): def Kdiag(self, X, X2=None):
return np.diag(self._K(X, X2)) return np.diag(self._K(X, X2))
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
if self.ARD: if self.ARD:
phi1 = self.phi(X) phi1 = self.phi(X)
@ -51,22 +51,22 @@ class BasisFuncKernel(Kern):
self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2) self.variance.gradient = np.einsum('ij,iq,jq->q', dL_dK, phi1, phi2)
else: else:
self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) * self.beta self.variance.gradient = np.einsum('ij,ij', dL_dK, self._K(X, X2)) * self.beta
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
if self.ARD: if self.ARD:
phi1 = self.phi(X) phi1 = self.phi(X)
self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1) self.variance.gradient = np.einsum('i,iq,iq->q', dL_dKdiag, phi1, phi1)
else: else:
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.Kdiag(X)) * self.beta
def concatenate_offset(self, X): def concatenate_offset(self, X):
return np.c_[np.ones((X.shape[0], 1)), X] return np.c_[np.ones((X.shape[0], 1)), X]
def posterior_inf(self, X=None, posterior=None): def posterior_inf(self, X=None, posterior=None):
""" """
Do the posterior inference on the parameters given this kernels functions Do the posterior inference on the parameters given this kernels functions
and the model posterior, which has to be a GPy posterior, usually found at m.posterior, if m is a GPy model. and the model posterior, which has to be a GPy posterior, usually found at m.posterior, if m is a GPy model.
If not given we search for the the highest parent to be a model, containing the posterior, and for X accordingly. If not given we search for the the highest parent to be a model, containing the posterior, and for X accordingly.
""" """
if X is None: if X is None:
try: try:
@ -80,7 +80,7 @@ class BasisFuncKernel(Kern):
raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference") raise RuntimeError("This kernel is not part of a model and cannot be used for posterior inference")
phi_alpha = self.phi(X) * self.variance phi_alpha = self.phi(X) * self.variance
return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha)) return (phi_alpha).T.dot(posterior.woodbury_vector), (np.eye(phi_alpha.shape[1])*self.variance - mdot(phi_alpha.T, posterior.woodbury_inv, phi_alpha))
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def _K(self, X, X2): def _K(self, X, X2):
if X2 is None or X is X2: if X2 is None or X is X2:
@ -95,35 +95,35 @@ class BasisFuncKernel(Kern):
phi1 = phi1[:, None] phi1 = phi1[:, None]
phi2 = phi2[:, None] phi2 = phi2[:, None]
return phi1.dot(phi2.T) return phi1.dot(phi2.T)
class LinearSlopeBasisFuncKernel(BasisFuncKernel): class LinearSlopeBasisFuncKernel(BasisFuncKernel):
def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'): def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='linear_segment'):
""" """
A linear segment transformation. The segments start at start, \ A linear segment transformation. The segments start at start, \
are then linear to stop and constant again. The segments are are then linear to stop and constant again. The segments are
normalized, so that they have exactly as much mass above normalized, so that they have exactly as much mass above
as below the origin. as below the origin.
Start and stop can be tuples or lists of starts and stops. Start and stop can be tuples or lists of starts and stops.
Behaviour of start stop is as np.where(X<start) would do. Behaviour of start stop is as np.where(X<start) would do.
""" """
self.start = np.array(start) self.start = np.array(start)
self.stop = np.array(stop) self.stop = np.array(stop)
super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name) super(LinearSlopeBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def _phi(self, X): def _phi(self, X):
phi = np.where(X < self.start, self.start, X) phi = np.where(X < self.start, self.start, X)
phi = np.where(phi > self.stop, self.stop, phi) phi = np.where(phi > self.stop, self.stop, phi)
return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1. return ((phi-(self.stop+self.start)/2.))#/(.5*(self.stop-self.start)))-1.
class ChangePointBasisFuncKernel(BasisFuncKernel): class ChangePointBasisFuncKernel(BasisFuncKernel):
def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'): def __init__(self, input_dim, changepoint, variance=1., active_dims=None, ARD=False, name='changepoint'):
self.changepoint = np.array(changepoint) self.changepoint = np.array(changepoint)
super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name) super(ChangePointBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def _phi(self, X): def _phi(self, X):
return np.where((X < self.changepoint), -1, 1) return np.where((X < self.changepoint), -1, 1)
@ -131,7 +131,7 @@ class ChangePointBasisFuncKernel(BasisFuncKernel):
class DomainKernel(LinearSlopeBasisFuncKernel): class DomainKernel(LinearSlopeBasisFuncKernel):
def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'): def __init__(self, input_dim, start, stop, variance=1., active_dims=None, ARD=False, name='constant_domain'):
super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name) super(DomainKernel, self).__init__(input_dim, start, stop, variance, active_dims, ARD, name)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def _phi(self, X): def _phi(self, X):
phi = np.where((X>self.start)*(X<self.stop), 1, 0) phi = np.where((X>self.start)*(X<self.stop), 1, 0)
@ -147,7 +147,7 @@ class LogisticBasisFuncKernel(BasisFuncKernel):
self.slope = Param('slope', slope, Logexp()) self.slope = Param('slope', slope, Logexp())
super(LogisticBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name) super(LogisticBasisFuncKernel, self).__init__(input_dim, variance, active_dims, ARD, name)
self.link_parameter(self.slope) self.link_parameter(self.slope)
@Cache_this(limit=3, ignore_args=()) @Cache_this(limit=3, ignore_args=())
def _phi(self, X): def _phi(self, X):
import scipy as sp import scipy as sp
@ -156,7 +156,7 @@ class LogisticBasisFuncKernel(BasisFuncKernel):
def parameters_changed(self): def parameters_changed(self):
BasisFuncKernel.parameters_changed(self) BasisFuncKernel.parameters_changed(self)
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
super(LogisticBasisFuncKernel, self).update_gradients_full(dL_dK, X, X2) super(LogisticBasisFuncKernel, self).update_gradients_full(dL_dK, X, X2)
if X2 is None or X is X2: if X2 is None or X is X2:

View file

@ -59,7 +59,7 @@ class Kern(Parameterized):
self._sliced_X = 0 self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU self.useGPU = self._support_GPU and useGPU
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool) self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool)
from .psi_comp import PSICOMP_GH from .psi_comp import PSICOMP_GH
self.psicomp = PSICOMP_GH() self.psicomp = PSICOMP_GH()
@ -100,6 +100,10 @@ class Kern(Parameterized):
return self.psicomp.psicomputations(self, Z, variational_posterior)[2] return self.psicomp.psicomputations(self, Z, variational_posterior)[2]
def gradients_X(self, dL_dK, X, X2): def gradients_X(self, dL_dK, X, X2):
raise NotImplementedError raise NotImplementedError
def gradients_XX(self, dL_dK, X, X2):
raise(NotImplementedError, "This is the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_XX_diag(self, dL_dK, X, X2):
raise(NotImplementedError, "This is the diagonal of the second derivative of K wrt X and X2, and not implemented for this kernel")
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
raise NotImplementedError raise NotImplementedError

View file

@ -19,6 +19,8 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full) put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag) put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
put_clean(dct, 'gradients_X', _slice_gradients_X) put_clean(dct, 'gradients_X', _slice_gradients_X)
put_clean(dct, 'gradients_XX', _slice_gradients_XX)
put_clean(dct, 'gradients_XX_diag', _slice_gradients_X_diag)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag) put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
put_clean(dct, 'psi0', _slice_psi) put_clean(dct, 'psi0', _slice_psi)
@ -30,9 +32,12 @@ class KernCallsViaSlicerMeta(ParametersChangedMeta):
return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct) return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
class _Slice_wrap(object): class _Slice_wrap(object):
def __init__(self, k, X, X2=None): def __init__(self, k, X, X2=None, ret_shape=None):
self.k = k self.k = k
self.shape = X.shape if ret_shape is None:
self.shape = X.shape
else:
self.shape = ret_shape
assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape) assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape)
if X2 is not None: if X2 is not None:
assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape) assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
@ -54,7 +59,10 @@ class _Slice_wrap(object):
def handle_return_array(self, return_val): def handle_return_array(self, return_val):
if self.ret: if self.ret:
ret = np.zeros(self.shape) ret = np.zeros(self.shape)
ret[:, self.k.active_dims] = return_val if len(self.shape) == 2:
ret[:, self.k.active_dims] = return_val
elif len(self.shape) == 3:
ret[:, :, self.k.active_dims] = return_val
return ret return ret
return return_val return return_val
@ -98,6 +106,19 @@ def _slice_gradients_X(f):
return ret return ret
return wrap return wrap
def _slice_gradients_XX(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
if X2 is None:
N, M = X.shape[0], X.shape[0]
else:
N, M = X.shape[0], X2.shape[0]
with _Slice_wrap(self, X, X2, ret_shape=(N, M, X.shape[1])) as s:
#with _Slice_wrap(self, X, X2, ret_shape=None) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
return ret
return wrap
def _slice_gradients_X_diag(f): def _slice_gradients_X_diag(f):
@wraps(f) @wraps(f)
def wrap(self, dL_dKdiag, X): def wrap(self, dL_dKdiag, X):

View file

@ -100,6 +100,12 @@ class Linear(Kern):
#return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) #return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
return np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK) return np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)
def gradients_XX(self, dL_dK, X, X2=None):
if X2 is None:
return 2*np.ones(X.shape)*self.variances
else:
return np.ones(X.shape)*self.variances
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*X return 2.*self.variances*dL_dKdiag[:,None]*X

View file

@ -31,6 +31,9 @@ class RBF(Stationary):
def dK_dr(self, r): def dK_dr(self, r):
return -r*self.K_of_r(r) return -r*self.K_of_r(r)
def dK2_drdr(self, r):
return (r**2-1)*self.K_of_r(r)
def __getstate__(self): def __getstate__(self):
dc = super(RBF, self).__getstate__() dc = super(RBF, self).__getstate__()
if self.useGPU: if self.useGPU:

View file

@ -15,7 +15,7 @@ from ...util.caching import Cache_this
try: try:
from . import stationary_cython from . import stationary_cython
except ImportError: except ImportError:
print('warning in sationary: failed to import cython module: falling back to numpy') print('warning in stationary: failed to import cython module: falling back to numpy')
config.set('cython', 'working', 'false') config.set('cython', 'working', 'false')
@ -78,6 +78,10 @@ class Stationary(Kern):
raise NotImplementedError("implement derivative of the covariance function wrt r to use this class") raise NotImplementedError("implement derivative of the covariance function wrt r to use this class")
@Cache_this(limit=20, ignore_args=()) @Cache_this(limit=20, ignore_args=())
def dK2_drdr(self, r):
raise NotImplementedError("implement second derivative of covariance wrt r to use this method")
@Cache_this(limit=5, ignore_args=())
def K(self, X, X2=None): def K(self, X, X2=None):
""" """
Kernel function applied on inputs X and X2. Kernel function applied on inputs X and X2.
@ -94,6 +98,11 @@ class Stationary(Kern):
#a convenience function, so we can cache dK_dr #a convenience function, so we can cache dK_dr
return self.dK_dr(self._scaled_dist(X, X2)) return self.dK_dr(self._scaled_dist(X, X2))
@Cache_this(limit=3, ignore_args=())
def dK2_drdr_via_X(self, X, X2):
#a convenience function, so we can cache dK_dr
return self.dK2_drdr(self._scaled_dist(X, X2))
def _unscaled_dist(self, X, X2=None): def _unscaled_dist(self, X, X2=None):
""" """
Compute the Euclidean distance between each row of X and X2, or between Compute the Euclidean distance between each row of X and X2, or between
@ -201,6 +210,59 @@ class Stationary(Kern):
else: else:
return self._gradients_X_pure(dL_dK, X, X2) return self._gradients_X_pure(dL_dK, X, X2)
def gradients_XX(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
..math:
\frac{\partial^2 K}{\partial X\partial X2}
..returns:
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ] (X2 is X if, X2 is None)
Thus, we return the second derivative in X2.
"""
# The off diagonals in Q are always zero, this should also be true for the Linear kernel...
# According to multivariable chain rule, we can chain the second derivative through r:
# d2K_dXdX2 = dK_dr*d2r_dXdX2 + d2K_drdr * dr_dX * dr_dX2:
invdist = self._inv_dist(X, X2)
invdist2 = invdist**2
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp1 = dL_dr * invdist
dL_drdr = self.dK2_drdr_via_X(X, X2) * dL_dK
tmp2 = dL_drdr * invdist2
l2 = np.ones(X.shape[1]) * self.lengthscale**2
if X2 is None:
X2 = X
tmp1 -= np.eye(X.shape[0])*self.variance
else:
tmp1[X==X2.T] -= self.variance
grad = np.empty((X.shape[0], X2.shape[0], X.shape[1]), dtype=np.float64)
#grad = np.empty(X.shape, dtype=np.float64)
for q in range(self.input_dim):
tmpdist2 = (X[:,[q]]-X2[:,[q]].T) ** 2
grad[:, :, q] = ((tmp1*invdist2 - tmp2)*tmpdist2/l2[q] - tmp1)/l2[q]
#grad[:, :, q] = ((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q]
#np.sum(((tmp1*(((tmpdist2)*invdist2/l2[q])-1)) - (tmp2*(tmpdist2))/l2[q])/l2[q], axis=1, out=grad[:,q])
#np.sum( - (tmp2*(tmpdist**2)), axis=1, out=grad[:,q])
return grad
def gradients_XX_diag(self, dL_dK, X):
"""
Given the derivative of the objective K(dL_dK), compute the second derivative of K wrt X and X2:
..math:
\frac{\partial^2 K}{\partial X\partial X2}
..returns:
dL2_dXdX2: NxMxQ, for X [NxQ] and X2[MxQ]
"""
return np.ones(X.shape) * self.variance/self.lengthscale**2
def _gradients_X_pure(self, dL_dK, X, X2=None): def _gradients_X_pure(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2) invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK dL_dr = self.dK_dr_via_X(X, X2) * dL_dK

View file

@ -1,7 +1,6 @@
from .bernoulli import Bernoulli from .bernoulli import Bernoulli
from .exponential import Exponential from .exponential import Exponential
from .gaussian import Gaussian from .gaussian import Gaussian, HeteroscedasticGaussian
from .gaussian import HeteroscedasticGaussian
from .gamma import Gamma from .gamma import Gamma
from .poisson import Poisson from .poisson import Poisson
from .student_t import StudentT from .student_t import StudentT

View file

@ -137,6 +137,20 @@ class BayesianGPLVM(SparseGP_MPI):
fignum, plot_inducing, legend, fignum, plot_inducing, legend,
plot_limits, aspect, updates, predict_kwargs, imshow_kwargs) plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
def plot_magnification(self, labels=None, which_indices=None,
resolution=50, ax=None, marker='o', s=40,
fignum=None, legend=True,
plot_limits=None,
aspect='auto', updates=False, **kwargs):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_magnification(self, labels, which_indices,
resolution, ax, marker, s,
fignum, False, legend,
plot_limits, aspect, updates, **kwargs)
def do_test_latents(self, Y): def do_test_latents(self, Y):
""" """
Compute the latent representation for a set of new points Y Compute the latent representation for a set of new points Y

View file

@ -43,19 +43,19 @@ class GPLVM(GP):
super(GPLVM, self).parameters_changed() super(GPLVM, self).parameters_changed()
self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None) self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dK'], self.X, None)
def jacobian(self,X): #def jacobian(self,X):
J = np.zeros((X.shape[0],X.shape[1],self.output_dim)) # J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
for i in range(self.output_dim): # for i in range(self.output_dim):
J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X) # J[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1], X, self.X)
return J # return J
def magnification(self,X): #def magnification(self,X):
target=np.zeros(X.shape[0]) # target=np.zeros(X.shape[0])
#J = np.zeros((X.shape[0],X.shape[1],self.output_dim)) # #J = np.zeros((X.shape[0],X.shape[1],self.output_dim))
J = self.jacobian(X) ## J = self.jacobian(X)
for i in range(X.shape[0]): # for i in range(X.shape[0]):
target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:])))) # target[i]=np.sqrt(np.linalg.det(np.dot(J[i,:,:],np.transpose(J[i,:,:]))))
return target # return target
def plot(self): def plot(self):
assert self.Y.shape[1] == 2, "too high dimensional to plot. Try plot_latent" assert self.Y.shape[1] == 2, "too high dimensional to plot. Try plot_latent"
@ -81,6 +81,3 @@ class GPLVM(GP):
resolution, ax, marker, s, resolution, ax, marker, s,
fignum, False, legend, fignum, False, legend,
plot_limits, aspect, updates, **kwargs) plot_limits, aspect, updates, **kwargs)
def plot_magnification(self, *args, **kwargs):
return util.plot_latent.plot_magnification(self, *args, **kwargs)

View file

@ -44,7 +44,7 @@ class SparseGPMiniBatch(SparseGP):
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
name='sparse gp', Y_metadata=None, normalizer=False, name='sparse gp', Y_metadata=None, normalizer=False,
missing_data=False, stochastic=False, batchsize=1): missing_data=False, stochastic=False, batchsize=1):
# pick a sensible inference method # pick a sensible inference method
if inference_method is None: if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian): if isinstance(likelihood, likelihoods.Gaussian):
@ -76,6 +76,7 @@ class SparseGPMiniBatch(SparseGP):
logger.info("Adding Z as parameter") logger.info("Adding Z as parameter")
self.link_parameter(self.Z, index=0) self.link_parameter(self.Z, index=0)
self.posterior = None self.posterior = None
self._predictive_variable = self.Z
def has_uncertain_inputs(self): def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior) return isinstance(self.X, VariationalPosterior)

View file

@ -114,7 +114,7 @@ def plot_latent(model, labels=None, which_indices=None,
# create a function which computes the shading of latent space according to the output variance # create a function which computes the shading of latent space according to the output variance
def plot_function(x): def plot_function(x):
Xtest_full = np.zeros((x.shape[0], model.X.shape[1])) Xtest_full = np.zeros((x.shape[0], X.shape[1]))
Xtest_full[:, [input_1, input_2]] = x Xtest_full[:, [input_1, input_2]] = x
_, var = model.predict(Xtest_full, **predict_kwargs) _, var = model.predict(Xtest_full, **predict_kwargs)
var = var[:, :1] var = var[:, :1]
@ -202,6 +202,7 @@ def plot_latent(model, labels=None, which_indices=None,
def plot_magnification(model, labels=None, which_indices=None, def plot_magnification(model, labels=None, which_indices=None,
resolution=60, ax=None, marker='o', s=40, resolution=60, ax=None, marker='o', s=40,
fignum=None, plot_inducing=False, legend=True, fignum=None, plot_inducing=False, legend=True,
plot_limits=None,
aspect='auto', updates=False): aspect='auto', updates=False):
""" """
:param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc) :param labels: a np.array of size model.num_data containing labels for the points (can be number, strings, etc)
@ -217,17 +218,88 @@ def plot_magnification(model, labels=None, which_indices=None,
input_1, input_2 = most_significant_input_dimensions(model, which_indices) input_1, input_2 = most_significant_input_dimensions(model, which_indices)
# first, plot the output variance as a function of the latent space #fethch the data points X that we'd like to plot
Xtest, xx, yy, xmin, xmax = x_frame2D(model.X[:, [input_1, input_2]], resolution=resolution) X = model.X
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) if isinstance(X, VariationalPosterior):
X = X.mean
else:
X = X
if X.shape[0] > 1000:
print("Warning: subsampling X, as it has more samples then 1000. X.shape={!s}".format(X.shape))
subsample = np.random.choice(X.shape[0], size=1000, replace=False)
X = X[subsample]
labels = labels[subsample]
#=======================================================================
# <<<WORK IN PROGRESS>>>
# <<<DO NOT DELETE>>>
# plt.close('all')
# fig, ax = plt.subplots(1,1)
# from GPy.plotting.matplot_dep.dim_reduction_plots import most_significant_input_dimensions
# import matplotlib.patches as mpatches
# i1, i2 = most_significant_input_dimensions(m, None)
# xmin, xmax = 100, -100
# ymin, ymax = 100, -100
# legend_handles = []
#
# X = m.X.mean[:, [i1, i2]]
# X = m.X.variance[:, [i1, i2]]
#
# xmin = X[:,0].min(); xmax = X[:,0].max()
# ymin = X[:,1].min(); ymax = X[:,1].max()
# range_ = [[xmin, xmax], [ymin, ymax]]
# ul = np.unique(labels)
#
# for i, l in enumerate(ul):
# #cdict = dict(red =[(0., colors[i][0], colors[i][0]), (1., colors[i][0], colors[i][0])],
# # green=[(0., colors[i][0], colors[i][1]), (1., colors[i][1], colors[i][1])],
# # blue =[(0., colors[i][0], colors[i][2]), (1., colors[i][2], colors[i][2])],
# # alpha=[(0., 0., .0), (.5, .5, .5), (1., .5, .5)])
# #cmap = LinearSegmentedColormap('{}'.format(l), cdict)
# cmap = LinearSegmentedColormap.from_list('cmap_{}'.format(str(l)), [colors[i], colors[i]], 255)
# cmap._init()
# #alphas = .5*(1+scipy.special.erf(np.linspace(-2,2, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
# alphas = (scipy.special.erf(np.linspace(0,2.4, cmap.N+3)))#np.log(np.linspace(np.exp(0), np.exp(1.), cmap.N+3))
# cmap._lut[:, -1] = alphas
# print l
# x, y = X[labels==l].T
#
# heatmap, xedges, yedges = np.histogram2d(x, y, bins=300, range=range_)
# #heatmap, xedges, yedges = np.histogram2d(x, y, bins=100)
#
# im = ax.imshow(heatmap, extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], cmap=cmap, aspect='auto', interpolation='nearest', label=str(l))
# legend_handles.append(mpatches.Patch(color=colors[i], label=l))
# ax.set_xlim(xmin, xmax)
# ax.set_ylim(ymin, ymax)
# plt.legend(legend_handles, [l.get_label() for l in legend_handles])
# plt.draw()
# plt.show()
#=======================================================================
#Create an IMshow controller that can re-plot the latent space shading at a good resolution
if plot_limits is None:
xmin, ymin = X[:, [input_1, input_2]].min(0)
xmax, ymax = X[:, [input_1, input_2]].max(0)
x_r, y_r = xmax-xmin, ymax-ymin
xmin -= .1*x_r
xmax += .1*x_r
ymin -= .1*y_r
ymax += .1*y_r
else:
try:
xmin, xmax, ymin, ymax = plot_limits
except (TypeError, ValueError) as e:
raise e.__class__("Wrong plot limits: {} given -> need (xmin, xmax, ymin, ymax)".format(plot_limits))
def plot_function(x): def plot_function(x):
Xtest_full = np.zeros((x.shape[0], X.shape[1]))
Xtest_full[:, [input_1, input_2]] = x Xtest_full[:, [input_1, input_2]] = x
mf=model.magnification(Xtest_full) mf = model.predict_magnification(Xtest_full)
return mf return mf
view = ImshowController(ax, plot_function, view = ImshowController(ax, plot_function,
tuple(model.X.min(0)[:, [input_1, input_2]]) + tuple(model.X.max(0)[:, [input_1, input_2]]), (xmin, ymin, xmax, ymax),
resolution, aspect=aspect, interpolation='bilinear', resolution, aspect=aspect, interpolation='bilinear',
cmap=pb.cm.gray) cmap=pb.cm.gray)
@ -250,11 +322,11 @@ def plot_magnification(model, labels=None, which_indices=None,
index = np.nonzero(labels == ul)[0] index = np.nonzero(labels == ul)[0]
if model.input_dim == 1: if model.input_dim == 1:
x = model.X[index, input_1] x = X[index, input_1]
y = np.zeros(index.size) y = np.zeros(index.size)
else: else:
x = model.X[index, input_1] x = X[index, input_1]
y = model.X[index, input_2] y = X[index, input_2]
ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label) ax.scatter(x, y, marker=m, s=s, color=Tango.nextMedium(), label=this_label)
ax.set_xlabel('latent dimension %i' % input_1) ax.set_xlabel('latent dimension %i' % input_1)
@ -263,13 +335,14 @@ def plot_magnification(model, labels=None, which_indices=None,
if not np.all(labels == 1.) and legend: if not np.all(labels == 1.) and legend:
ax.legend(loc=0, numpoints=1) ax.legend(loc=0, numpoints=1)
ax.set_xlim(xmin[0], xmax[0]) ax.set_xlim((xmin, xmax))
ax.set_ylim(xmin[1], xmax[1]) ax.set_ylim((ymin, ymax))
ax.grid(b=False) # remove the grid if present, it doesn't look good ax.grid(b=False) # remove the grid if present, it doesn't look good
ax.set_aspect('auto') # set a nice aspect ratio ax.set_aspect('auto') # set a nice aspect ratio
if plot_inducing: if plot_inducing:
ax.plot(model.Z[:, input_1], model.Z[:, input_2], '^w') Z = model.Z
ax.scatter(Z[:, input_1], Z[:, input_2], c='w', s=18, marker="^", edgecolor='k', linewidth=.3, alpha=.7)
if updates: if updates:
fig.canvas.show() fig.canvas.show()
@ -314,8 +387,8 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
this_label = 'class %i' % i this_label = 'class %i' % i
m = marker.next() m = marker.next()
index = np.nonzero(data_labels == ul)[0] index = np.nonzero(data_labels == ul)[0]
x = model.X[index, input_1] x = X[index, input_1]
y = model.X[index, input_2] y = X[index, input_2]
ax.scatter(x, y, marker=m, s=data_s, color=Tango.nextMedium(), label=this_label) ax.scatter(x, y, marker=m, s=data_s, color=Tango.nextMedium(), label=this_label)
ax.set_xlabel('latent dimension %i' % input_1) ax.set_xlabel('latent dimension %i' % input_1)
@ -323,7 +396,7 @@ def plot_steepest_gradient_map(model, fignum=None, ax=None, which_indices=None,
controller = ImAnnotateController(ax, controller = ImAnnotateController(ax,
plot_function, plot_function,
tuple(model.X.min(0)[:, significant_dims]) + tuple(model.X.max(0)[:, significant_dims]), tuple(X.min(0)[:, significant_dims]) + tuple(X.max(0)[:, significant_dims]),
resolution=resolution, resolution=resolution,
aspect=aspect, aspect=aspect,
cmap=get_cmap('jet'), cmap=get_cmap('jet'),

View file

@ -7,48 +7,13 @@
import numpy as np import numpy as np
from scipy import linalg from scipy import linalg
import types from scipy.linalg import lapack, blas
import ctypes
from ctypes import byref, c_char, c_int, c_double # TODO
import scipy
import warnings
import os
from .config import config from .config import config
import logging import logging
from . import linalg_cython from . import linalg_cython
_scipyversion = np.float64((scipy.__version__).split('.')[:2])
_fix_dpotri_scipy_bug = True
if np.all(_scipyversion >= np.array([0, 14])):
from scipy.linalg import lapack
_fix_dpotri_scipy_bug = False
elif np.all(_scipyversion >= np.array([0, 12])):
#import scipy.linalg.lapack.clapack as lapack
from scipy.linalg import lapack
else:
from scipy.linalg.lapack import flapack as lapack
if config.getboolean('anaconda', 'installed') and config.getboolean('anaconda', 'MKL'):
try:
anaconda_path = str(config.get('anaconda', 'location'))
mkl_rt = ctypes.cdll.LoadLibrary(os.path.join(anaconda_path, 'DLLs', 'mkl_rt.dll'))
dsyrk = mkl_rt.dsyrk
dsyr = mkl_rt.dsyr
_blas_available = True
print('anaconda installed and mkl is loaded')
except:
_blas_available = False
else:
try:
_blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__) # @UndefinedVariable
dsyrk = _blaslib.dsyrk_
dsyr = _blaslib.dsyr_
_blas_available = True
except AttributeError as e:
_blas_available = False
warnings.warn("warning: caught this exception:" + str(e))
def force_F_ordered_symmetric(A): def force_F_ordered_symmetric(A):
""" """
return a F ordered version of A, assuming A is symmetric return a F ordered version of A, assuming A is symmetric
@ -169,9 +134,6 @@ def dpotri(A, lower=1):
:returns: A inverse :returns: A inverse
""" """
if _fix_dpotri_scipy_bug:
assert lower==1, "scipy linalg behaviour is very weird. please use lower, fortran ordered arrays"
lower = 0
A = force_F_ordered(A) A = force_F_ordered(A)
R, info = lapack.dpotri(A, lower=lower) #needs to be zero here, seems to be a scipy bug R, info = lapack.dpotri(A, lower=lower) #needs to be zero here, seems to be a scipy bug
@ -300,8 +262,8 @@ def pca(Y, input_dim):
Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False) Z = linalg.svd(Y - Y.mean(axis=0), full_matrices=False)
[X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]] [X, W] = [Z[0][:, 0:input_dim], np.dot(np.diag(Z[1]), Z[2]).T[:, 0:input_dim]]
v = X.std(axis=0) v = X.std(axis=0)
X /= v; X /= v
W *= v; W *= v
return X, W.T return X, W.T
def ppca(Y, Q, iterations=100): def ppca(Y, Q, iterations=100):
@ -347,34 +309,15 @@ def tdot_blas(mat, out=None):
out[:] = 0.0 out[:] = 0.0
# # Call to DSYRK from BLAS # # Call to DSYRK from BLAS
# If already in Fortran order (rare), and has the right sorts of strides I
# could avoid the copy. I also thought swapping to cblas API would allow use
# of C order. However, I tried that and had errors with large matrices:
# http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py
mat = np.asfortranarray(mat) mat = np.asfortranarray(mat)
TRANS = c_char('n'.encode('ascii')) out = blas.dsyrk(alpha=1.0, a=mat, beta=0.0, c=out, overwrite_c=1,
N = c_int(mat.shape[0]) trans=0, lower=0)
K = c_int(mat.shape[1])
LDA = c_int(mat.shape[0])
UPLO = c_char('l'.encode('ascii'))
ALPHA = c_double(1.0)
A = mat.ctypes.data_as(ctypes.c_void_p)
BETA = c_double(0.0)
C = out.ctypes.data_as(ctypes.c_void_p)
LDC = c_int(np.max(out.strides) // 8)
dsyrk(byref(UPLO), byref(TRANS), byref(N), byref(K),
byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
symmetrify(out, upper=True) symmetrify(out, upper=True)
return np.ascontiguousarray(out) return np.ascontiguousarray(out)
def tdot(*args, **kwargs): def tdot(*args, **kwargs):
if _blas_available: return tdot_blas(*args, **kwargs)
return tdot_blas(*args, **kwargs)
else:
return tdot_numpy(*args, **kwargs)
def DSYR_blas(A, x, alpha=1.): def DSYR_blas(A, x, alpha=1.):
""" """
@ -386,15 +329,7 @@ def DSYR_blas(A, x, alpha=1.):
:param alpha: scalar :param alpha: scalar
""" """
N = c_int(A.shape[0]) A = blas.dsyr(lower=0, x=x, a=A, alpha=alpha, overwrite_a=True)
LDA = c_int(A.shape[0])
UPLO = c_char('l'.encode('ascii'))
ALPHA = c_double(alpha)
A_ = A.ctypes.data_as(ctypes.c_void_p)
x_ = x.ctypes.data_as(ctypes.c_void_p)
INCX = c_int(1)
dsyr(byref(UPLO), byref(N), byref(ALPHA),
x_, byref(INCX), A_, byref(LDA))
symmetrify(A, upper=True) symmetrify(A, upper=True)
def DSYR_numpy(A, x, alpha=1.): def DSYR_numpy(A, x, alpha=1.):
@ -411,10 +346,8 @@ def DSYR_numpy(A, x, alpha=1.):
def DSYR(*args, **kwargs): def DSYR(*args, **kwargs):
if _blas_available: return DSYR_blas(*args, **kwargs)
return DSYR_blas(*args, **kwargs)
else:
return DSYR_numpy(*args, **kwargs)
def symmetrify(A, upper=False): def symmetrify(A, upper=False):
""" """