mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-15 06:52:39 +02:00
Merge branch 'devel' of github.com:SheffieldML/GPy into devel
This commit is contained in:
commit
5a6347a6ba
22 changed files with 424 additions and 105 deletions
|
|
@ -285,11 +285,11 @@ class GP(Model):
|
|||
plot_raw=plot_raw, Y_metadata=Y_metadata,
|
||||
data_symbol=data_symbol, **kw)
|
||||
|
||||
def input_sensitivity(self):
|
||||
def input_sensitivity(self, summarize=True):
|
||||
"""
|
||||
Returns the sensitivity for each dimension of this model
|
||||
"""
|
||||
return self.kern.input_sensitivity()
|
||||
return self.kern.input_sensitivity(summarize=summarize)
|
||||
|
||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -191,7 +191,7 @@ class Pickleable(object):
|
|||
"""
|
||||
import cPickle as pickle
|
||||
if isinstance(f, str):
|
||||
with open(f, 'w') as f:
|
||||
with open(f, 'wb') as f:
|
||||
pickle.dump(self, f, protocol)
|
||||
else:
|
||||
pickle.dump(self, f, protocol)
|
||||
|
|
@ -954,19 +954,30 @@ class Parameterizable(OptimizationHandlable):
|
|||
if ignore_added_names:
|
||||
self.__dict__[pname] = param
|
||||
return
|
||||
|
||||
def warn_and_retry():
|
||||
print """
|
||||
WARNING: added a parameter with formatted name {},
|
||||
which is already assigned to {}.
|
||||
Trying to change the parameter name to
|
||||
|
||||
{}.{}
|
||||
""".format(pname, self.hierarchy_name(), self.hierarchy_name(), param.name + "_")
|
||||
param.name += "_"
|
||||
self._add_parameter_name(param, ignore_added_names)
|
||||
# and makes sure to not delete programmatically added parameters
|
||||
if pname in self.__dict__:
|
||||
if not (param is self.__dict__[pname]):
|
||||
if pname in self._added_names_:
|
||||
del self.__dict__[pname]
|
||||
self._add_parameter_name(param)
|
||||
else:
|
||||
warn_and_retry()
|
||||
elif pname not in dir(self):
|
||||
self.__dict__[pname] = param
|
||||
self._added_names_.add(pname)
|
||||
else:
|
||||
print "WARNING: added a parameter with formatted name {}, which is already a member of {} object. Trying to change the parameter name to\n {}".format(pname, self.__class__, param.name + "_")
|
||||
param.name += "_"
|
||||
self._add_parameter_name(param, ignore_added_names)
|
||||
warn_and_retry()
|
||||
|
||||
def _remove_parameter_name(self, param=None, pname=None):
|
||||
assert param is None or pname is None, "can only delete either param by name, or the name of a param"
|
||||
|
|
|
|||
|
|
@ -180,7 +180,10 @@ class Parameterized(Parameterizable):
|
|||
:param param: param object to remove from being a parameter of this parameterized object.
|
||||
"""
|
||||
if not param in self.parameters:
|
||||
raise RuntimeError, "Parameter {} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name)
|
||||
try:
|
||||
raise RuntimeError, "{} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name)
|
||||
except AttributeError:
|
||||
raise RuntimeError, "{} does not seem to be a parameter, remove parameters directly from their respective parents".format(str(param))
|
||||
|
||||
start = sum([p.size for p in self.parameters[:param._parent_index_]])
|
||||
self._remove_parameter_name(param)
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ class Transformation(object):
|
|||
class Logexp(Transformation):
|
||||
domain = _POSITIVE
|
||||
def f(self, x):
|
||||
return np.where(x>_lim_val, x, np.log(1. + np.exp(np.clip(x, -_lim_val, _lim_val)))) + epsilon
|
||||
return np.where(x>_lim_val, x, np.log1p(np.exp(np.clip(x, -_lim_val, _lim_val)))) + epsilon
|
||||
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
|
||||
def finv(self, f):
|
||||
return np.where(f>_lim_val, f, np.log(np.exp(f+1e-20) - 1.))
|
||||
|
|
|
|||
|
|
@ -111,7 +111,7 @@ class VariationalPosterior(Parameterized):
|
|||
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
|
||||
return n
|
||||
else:
|
||||
return super(VariationalPrior, self).__getitem__(s)
|
||||
return super(VariationalPosterior, self).__getitem__(s)
|
||||
|
||||
class NormalPosterior(VariationalPosterior):
|
||||
'''
|
||||
|
|
|
|||
|
|
@ -169,14 +169,15 @@ class VarDTC_minibatch(LatentFunctionInference):
|
|||
Kmm = kern.K(Z).copy()
|
||||
diag.add(Kmm, self.const_jitter)
|
||||
Lm = jitchol(Kmm)
|
||||
|
||||
Lambda = Kmm+psi2_full
|
||||
|
||||
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
|
||||
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
|
||||
LL = jitchol(Lambda)
|
||||
LL = np.dot(Lm,LL)
|
||||
b,_ = dtrtrs(LL, psi1Y_full.T)
|
||||
bbt = np.square(b).sum()
|
||||
v,_ = dtrtrs(LL.T,b,lower=False)
|
||||
vvt = np.einsum('md,od->mo',v,v)
|
||||
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
|
||||
|
||||
Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T
|
||||
LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0]
|
||||
|
|
|
|||
|
|
@ -14,6 +14,13 @@ class Add(CombinationKernel):
|
|||
This kernel will take over the active dims of it's subkernels passed in.
|
||||
"""
|
||||
def __init__(self, subkerns, name='add'):
|
||||
for i, kern in enumerate(subkerns[:]):
|
||||
if isinstance(kern, Add):
|
||||
del subkerns[i]
|
||||
for part in kern.parts[::-1]:
|
||||
kern.remove_parameter(part)
|
||||
subkerns.insert(i, part)
|
||||
|
||||
super(Add, self).__init__(subkerns, name)
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
|
|
@ -118,9 +125,9 @@ class Add(CombinationKernel):
|
|||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
|
||||
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
|
||||
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
|
||||
def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
|
|
@ -135,16 +142,15 @@ class Add(CombinationKernel):
|
|||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
|
||||
target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
return target
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
target_mu = np.zeros(variational_posterior.shape)
|
||||
target_S = np.zeros(variational_posterior.shape)
|
||||
target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters]
|
||||
for p1 in self.parameters:
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
|
|
@ -154,15 +160,14 @@ class Add(CombinationKernel):
|
|||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
target_mu += a
|
||||
target_S += b
|
||||
return target_mu, target_S
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
|
||||
grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
[np.add(target_grads[i],grads[i],target_grads[i]) for i in xrange(len(grads))]
|
||||
return target_grads
|
||||
|
||||
def add(self, other, name='sum'):
|
||||
def add(self, other):
|
||||
if isinstance(other, Add):
|
||||
other_params = other.parameters[:]
|
||||
for p in other_params:
|
||||
|
|
@ -173,5 +178,11 @@ class Add(CombinationKernel):
|
|||
self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts)
|
||||
return self
|
||||
|
||||
def input_sensitivity(self):
|
||||
return reduce(np.add, [k.input_sensitivity() for k in self.parts])
|
||||
def input_sensitivity(self, summarize=True):
|
||||
if summarize:
|
||||
return reduce(np.add, [k.input_sensitivity(summarize) for k in self.parts])
|
||||
else:
|
||||
i_s = np.zeros((len(self.parts), self.input_dim))
|
||||
from operator import setitem
|
||||
[setitem(i_s, (i, Ellipsis), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)]
|
||||
return i_s
|
||||
|
|
|
|||
|
|
@ -23,9 +23,9 @@ class Kern(Parameterized):
|
|||
|
||||
input_dim:
|
||||
|
||||
is the number of dimensions to work on. Make sure to give the
|
||||
is the number of dimensions to work on. Make sure to give the
|
||||
tight dimensionality of inputs.
|
||||
You most likely want this to be the integer telling the number of
|
||||
You most likely want this to be the integer telling the number of
|
||||
input dimensions of the kernel.
|
||||
If this is not an integer (!) we will work on the whole input matrix X,
|
||||
and not check whether dimensions match or not (!).
|
||||
|
|
@ -134,7 +134,7 @@ class Kern(Parameterized):
|
|||
from ...plotting.matplot_dep import kernel_plots
|
||||
return kernel_plots.plot_ARD(self,*args,**kw)
|
||||
|
||||
def input_sensitivity(self):
|
||||
def input_sensitivity(self, summarize=True):
|
||||
"""
|
||||
Returns the sensitivity for each dimension of this kernel.
|
||||
"""
|
||||
|
|
@ -144,6 +144,9 @@ class Kern(Parameterized):
|
|||
""" Overloading of the '+' operator. for more control, see self.add """
|
||||
return self.add(other)
|
||||
|
||||
def __iadd__(self, other):
|
||||
return self.add(other)
|
||||
|
||||
def add(self, other, name='add'):
|
||||
"""
|
||||
Add another kernel to this one.
|
||||
|
|
@ -235,7 +238,12 @@ class CombinationKernel(Kern):
|
|||
active_dims = np.arange(input_dim)
|
||||
return input_dim, active_dims
|
||||
|
||||
def input_sensitivity(self):
|
||||
def input_sensitivity(self, summarize=True):
|
||||
"""
|
||||
If summize is true, we want to get the summerized view of the sensitivities,
|
||||
otherwise put everything into an array with shape (#kernels, input_dim)
|
||||
in the order of appearance of the kernels in the parameterized object.
|
||||
"""
|
||||
raise NotImplementedError("Choose the kernel you want to get the sensitivity for. You need to override the default behaviour for getting the input sensitivity to be able to get the input sensitivity. For sum kernel it is the sum of all sensitivities, TODO: product kernel? Other kernels?, also TODO: shall we return all the sensitivities here in the combination kernel? So we can combine them however we want? This could lead to just plot all the sensitivities here...")
|
||||
|
||||
def _check_active_dims(self, X):
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ class Linear(Kern):
|
|||
self.variances = Param('variances', variances, Logexp())
|
||||
self.add_parameter(self.variances)
|
||||
self.psicomp = PSICOMP_Linear()
|
||||
|
||||
|
||||
@Cache_this(limit=2)
|
||||
def K(self, X, X2=None):
|
||||
if self.ARD:
|
||||
|
|
@ -76,10 +76,12 @@ class Linear(Kern):
|
|||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
if self.ARD:
|
||||
if X2 is None:
|
||||
self.variances.gradient = np.array([np.sum(dL_dK * tdot(X[:, i:i + 1])) for i in range(self.input_dim)])
|
||||
#self.variances.gradient = np.array([np.sum(dL_dK * tdot(X[:, i:i + 1])) for i in range(self.input_dim)])
|
||||
self.variances.gradient = np.einsum('ij,iq,jq->q', dL_dK, X, X)
|
||||
else:
|
||||
product = X[:, None, :] * X2[None, :, :]
|
||||
self.variances.gradient = (dL_dK[:, :, None] * product).sum(0).sum(0)
|
||||
#product = X[:, None, :] * X2[None, :, :]
|
||||
#self.variances.gradient = (dL_dK[:, :, None] * product).sum(0).sum(0)
|
||||
self.variances.gradient = np.einsum('ij,iq,jq->q', dL_dK, X, X2)
|
||||
else:
|
||||
self.variances.gradient = np.sum(self._dot_product(X, X2) * dL_dK)
|
||||
|
||||
|
|
@ -93,9 +95,10 @@ class Linear(Kern):
|
|||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
if X2 is None:
|
||||
return np.einsum('mq,nm->nq',X*self.variances,dL_dK)+np.einsum('nq,nm->mq',X*self.variances,dL_dK)
|
||||
return np.einsum('jq,q,ij->iq', X, 2*self.variances, dL_dK)
|
||||
else:
|
||||
return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
#return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
|
||||
return np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
return 2.*self.variances*dL_dKdiag[:,None]*X
|
||||
|
|
@ -113,7 +116,6 @@ class Linear(Kern):
|
|||
def psi1(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[1]
|
||||
|
||||
@Cache_this(limit=1)
|
||||
def psi2(self, Z, variational_posterior):
|
||||
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[2]
|
||||
|
||||
|
|
@ -130,7 +132,6 @@ class Linear(Kern):
|
|||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[2:]
|
||||
|
||||
|
||||
class LinearFull(Kern):
|
||||
def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'):
|
||||
super(LinearFull, self).__init__(input_dim, active_dims, name)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import ssrbf_psi_comp
|
|||
import sslinear_psi_comp
|
||||
import linear_psi_comp
|
||||
|
||||
class PSICOMP_RBF(object):
|
||||
class PSICOMP_RBF(Pickleable):
|
||||
|
||||
@Cache_this(limit=2, ignore_args=(0,))
|
||||
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
|
||||
|
|
@ -29,7 +29,7 @@ class PSICOMP_RBF(object):
|
|||
else:
|
||||
raise ValueError, "unknown distriubtion received for psi-statistics"
|
||||
|
||||
class PSICOMP_Linear(object):
|
||||
class PSICOMP_Linear(Pickleable):
|
||||
|
||||
@Cache_this(limit=2, ignore_args=(0,))
|
||||
def psicomputations(self, variance, Z, variational_posterior):
|
||||
|
|
@ -47,4 +47,7 @@ class PSICOMP_Linear(object):
|
|||
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
|
||||
else:
|
||||
raise ValueError, "unknown distriubtion received for psi-statistics"
|
||||
raise ValueError, "unknown distriubtion received for psi-statistics"
|
||||
|
||||
def _setup_observers(self):
|
||||
pass
|
||||
|
|
@ -21,9 +21,7 @@ def psicomputations(variance, Z, variational_posterior):
|
|||
|
||||
psi0 = np.einsum('q,nq->n',variance,np.square(mu)+S)
|
||||
psi1 = np.einsum('q,mq,nq->nm',variance,Z,mu)
|
||||
|
||||
tmp = np.einsum('q,mq,nq->nm',variance,Z,mu)
|
||||
psi2 = np.einsum('q,mq,oq,nq->mo',np.square(variance),Z,Z,S) + np.einsum('nm,no->mo',tmp,tmp)
|
||||
psi2 = np.einsum('q,mq,oq,nq->mo',np.square(variance),Z,Z,S) + np.einsum('nm,no->mo',psi1,psi1)
|
||||
|
||||
return psi0, psi1, psi2
|
||||
|
||||
|
|
@ -58,16 +56,15 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S):
|
|||
|
||||
variance2 = np.square(variance)
|
||||
common_sum = np.einsum('q,mq,nq->nm',variance,Z,mu) # NxM
|
||||
Z_expect = np.einsum('mo,mq,oq->q',dL_dpsi2,Z,Z)
|
||||
common_expect = np.einsum('mo,mq,no->nq',dL_dpsi2+dL_dpsi2.T,Z,common_sum)
|
||||
|
||||
dL_dvar = np.einsum('mo,nq,q,mq,oq->q',dL_dpsi2,2.*S,variance,Z,Z)+\
|
||||
np.einsum('mo,mq,nq,no->q',dL_dpsi2,Z,mu,common_sum)+\
|
||||
np.einsum('mo,oq,nq,nm->q',dL_dpsi2,Z,mu,common_sum)
|
||||
dL_dvar = np.einsum('q,nq,q->q',Z_expect,2.*S,variance)+ np.einsum('nq,nq->q',common_expect,mu)
|
||||
|
||||
dL_dmu = np.einsum('mo,q,mq,no->nq',dL_dpsi2,variance,Z,common_sum)+\
|
||||
np.einsum('mo,q,oq,nm->nq',dL_dpsi2,variance,Z,common_sum)
|
||||
dL_dmu = np.einsum('nq,q->nq',common_expect,variance)
|
||||
|
||||
dL_dS = np.empty(S.shape)
|
||||
dL_dS[:] = np.einsum('mo,q,mq,oq->q',dL_dpsi2,variance2,Z,Z)
|
||||
dL_dS[:] = np.einsum('q,q->q',Z_expect,variance2)
|
||||
|
||||
dL_dZ = 2.*(np.einsum('om,q,mq,nq->oq',dL_dpsi2,variance2,Z,S)+np.einsum('om,q,nq,nm->oq',dL_dpsi2,variance,mu,common_sum))
|
||||
|
||||
|
|
|
|||
|
|
@ -378,7 +378,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
|
|||
if self.GPU_direct:
|
||||
dL_dpsi1_gpu = dL_dpsi1
|
||||
dL_dpsi2_gpu = dL_dpsi2
|
||||
dL_dpsi0_sum = gpuarray.sum(dL_dpsi0).get()
|
||||
dL_dpsi0_sum = dL_dpsi0.get().sum() #gpuarray.sum(dL_dpsi0).get()
|
||||
else:
|
||||
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
|
||||
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
|
||||
|
|
@ -394,7 +394,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
|
|||
self.g_psi1compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dL_dpsi1_gpu.gpudata,psi1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
|
||||
self.g_psi2compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dL_dpsi2_gpu.gpudata,psi2n_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
|
||||
|
||||
dL_dvar = dL_dpsi0_sum + gpuarray.sum(dvar_gpu).get()
|
||||
dL_dvar = dL_dpsi0_sum + dvar_gpu.get().sum()#gpuarray.sum(dvar_gpu).get()
|
||||
sum_axis(grad_mu_gpu,dmu_gpu,N*Q,self.blocknum)
|
||||
dL_dmu = grad_mu_gpu.get()
|
||||
sum_axis(grad_S_gpu,dS_gpu,N*Q,self.blocknum)
|
||||
|
|
@ -404,7 +404,7 @@ class PSICOMP_RBF_GPU(PSICOMP_RBF):
|
|||
sum_axis(grad_l_gpu,dl_gpu,Q,self.blocknum)
|
||||
dL_dlengscale = grad_l_gpu.get()
|
||||
else:
|
||||
dL_dlengscale = gpuarray.sum(dl_gpu).get()
|
||||
dL_dlengscale = dl_gpu.get().sum() #gpuarray.sum(dl_gpu).get()
|
||||
|
||||
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
|
||||
|
||||
|
|
|
|||
|
|
@ -22,11 +22,8 @@ def psicomputations(variance, Z, variational_posterior):
|
|||
|
||||
psi0 = np.einsum('q,nq,nq->n',variance,gamma,np.square(mu)+S)
|
||||
psi1 = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu)
|
||||
mu2 = np.square(mu)
|
||||
variances2 = np.square(variance)
|
||||
tmp = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu)
|
||||
psi2 = np.einsum('nq,q,mq,oq,nq->mo',gamma,variances2,Z,Z,mu2+S)+\
|
||||
np.einsum('nm,no->mo',tmp,tmp) - np.einsum('nq,q,mq,oq,nq->mo',np.square(gamma),variances2,Z,Z,mu2)
|
||||
psi2 = np.einsum('nq,q,mq,oq,nq->mo',gamma,np.square(variance),Z,Z,(1-gamma)*np.square(mu)+S) +\
|
||||
np.einsum('nm,no->mo',psi1,psi1)
|
||||
|
||||
return psi0, psi1, psi2
|
||||
|
||||
|
|
@ -67,22 +64,20 @@ def _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma):
|
|||
variance2 = np.square(variance)
|
||||
mu2S = mu2+S # NxQ
|
||||
common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM
|
||||
|
||||
dL_dvar = np.einsum('mo,nq,q,mq,oq->q',dL_dpsi2,2.*(gamma*mu2S-gamma2*mu2),variance,Z,Z)+\
|
||||
np.einsum('mo,nq,mq,nq,no->q',dL_dpsi2,gamma,Z,mu,common_sum)+\
|
||||
np.einsum('mo,nq,oq,nq,nm->q',dL_dpsi2,gamma,Z,mu,common_sum)
|
||||
Z_expect = np.einsum('mo,mq,oq->q',dL_dpsi2,Z,Z)
|
||||
common_expect = np.einsum('mo,mq,no->nq',dL_dpsi2+dL_dpsi2.T,Z,common_sum)
|
||||
|
||||
dL_dvar = np.einsum('nq,q,q->q',2.*(gamma*mu2S-gamma2*mu2),variance,Z_expect)+\
|
||||
np.einsum('nq,nq,nq->q',common_expect,gamma,mu)
|
||||
|
||||
dL_dgamma = np.einsum('mo,q,mq,oq,nq->nq',dL_dpsi2,variance2,Z,Z,(mu2S-2.*gamma*mu2))+\
|
||||
np.einsum('mo,q,mq,nq,no->nq',dL_dpsi2,variance,Z,mu,common_sum)+\
|
||||
np.einsum('mo,q,oq,nq,nm->nq',dL_dpsi2,variance,Z,mu,common_sum)
|
||||
dL_dgamma = np.einsum('q,q,nq->nq',Z_expect,variance2,(mu2S-2.*gamma*mu2))+\
|
||||
np.einsum('nq,q,nq->nq',common_expect,variance,mu)
|
||||
|
||||
dL_dmu = np.einsum('mo,q,mq,oq,nq,nq->nq',dL_dpsi2,variance2,Z,Z,mu,2.*(gamma-gamma2))+\
|
||||
np.einsum('mo,nq,q,mq,no->nq',dL_dpsi2,gamma,variance,Z,common_sum)+\
|
||||
np.einsum('mo,nq,q,oq,nm->nq',dL_dpsi2,gamma,variance,Z,common_sum)
|
||||
dL_dmu = np.einsum('q,q,nq,nq->nq',Z_expect,variance2,mu,2.*(gamma-gamma2))+\
|
||||
np.einsum('nq,nq,q->nq',common_expect,gamma,variance)
|
||||
|
||||
dL_dS = np.einsum('mo,nq,q,mq,oq->nq',dL_dpsi2,gamma,variance2,Z,Z)
|
||||
dL_dS = np.einsum('q,nq,q->nq',Z_expect,gamma,variance2)
|
||||
|
||||
dL_dZ = 2.*(np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma,variance2,Z,mu2S)+np.einsum('om,nq,q,nq,nm->oq',dL_dpsi2,gamma,variance,mu,common_sum)
|
||||
-np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma2,variance2,Z,mu2))
|
||||
dL_dZ = 2.*(np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma,variance2,Z,(mu2S-gamma*mu2))+np.einsum('om,nq,q,nq,nm->oq',dL_dpsi2,gamma,variance,mu,common_sum))
|
||||
|
||||
return dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ
|
||||
|
|
|
|||
|
|
@ -40,6 +40,11 @@ class Static(Kern):
|
|||
K = self.K(variational_posterior.mean, Z)
|
||||
return np.einsum('ij,ik->jk',K,K) #K[:,:,None]*K[:,None,:] # NB. more efficient implementations on inherriting classes
|
||||
|
||||
def input_sensitivity(self, summarize=True):
|
||||
if summarize:
|
||||
return super(Static, self).input_sensitivity(summarize=summarize)
|
||||
else:
|
||||
return np.ones(self.input_dim) * self.variance
|
||||
|
||||
class White(Static):
|
||||
def __init__(self, input_dim, variance=1., active_dims=None, name='white'):
|
||||
|
|
@ -63,7 +68,6 @@ class White(Static):
|
|||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
self.variance.gradient = dL_dpsi0.sum()
|
||||
|
||||
|
||||
class Bias(Static):
|
||||
def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
|
||||
super(Bias, self).__init__(input_dim, variance, active_dims, name)
|
||||
|
|
|
|||
|
|
@ -179,7 +179,7 @@ class Stationary(Kern):
|
|||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
return np.zeros(X.shape)
|
||||
|
||||
def input_sensitivity(self):
|
||||
def input_sensitivity(self, summarize=True):
|
||||
return np.ones(self.input_dim)/self.lengthscale**2
|
||||
|
||||
class Exponential(Stationary):
|
||||
|
|
@ -340,7 +340,7 @@ class RatQuad(Stationary):
|
|||
"""
|
||||
|
||||
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, active_dims=None, name='ExpQuad'):
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, active_dims=None, name='RatQuad'):
|
||||
super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
self.power = Param('power', power, Logexp())
|
||||
self.add_parameters(self.power)
|
||||
|
|
|
|||
|
|
@ -18,3 +18,5 @@ from gp_coregionalized_regression import GPCoregionalizedRegression
|
|||
from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
|
||||
from gp_heteroscedastic_regression import GPHeteroscedasticRegression
|
||||
from ss_mrd import SSMRD
|
||||
from gp_kronecker_gaussian_regression import GPKroneckerGaussianRegression
|
||||
from gp_var_gauss import GPVariationalGaussianApproximation
|
||||
|
|
|
|||
119
GPy/models/gp_kronecker_gaussian_regression.py
Normal file
119
GPy/models/gp_kronecker_gaussian_regression.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
# Copyright (c) 2014, James Hensman, Alan Saul
|
||||
# Distributed under the terms of the GNU General public License, see LICENSE.txt
|
||||
|
||||
import numpy as np
|
||||
from ..core.model import Model
|
||||
from ..core.parameterization import ObsAr
|
||||
from .. import likelihoods
|
||||
|
||||
class GPKroneckerGaussianRegression(Model):
|
||||
"""
|
||||
Kronecker GP regression
|
||||
|
||||
Take two kernels computed on separate spaces K1(X1), K2(X2), and a data
|
||||
matrix Y which is f size (N1, N2).
|
||||
|
||||
The effective covaraince is np.kron(K2, K1)
|
||||
The effective data is vec(Y) = Y.flatten(order='F')
|
||||
|
||||
The noise must be iid Gaussian.
|
||||
|
||||
See Stegle et al.
|
||||
@inproceedings{stegle2011efficient,
|
||||
title={Efficient inference in matrix-variate gaussian models with $\\backslash$ iid observation noise},
|
||||
author={Stegle, Oliver and Lippert, Christoph and Mooij, Joris M and Lawrence, Neil D and Borgwardt, Karsten M},
|
||||
booktitle={Advances in Neural Information Processing Systems},
|
||||
pages={630--638},
|
||||
year={2011}
|
||||
}
|
||||
|
||||
"""
|
||||
def __init__(self, X1, X2, Y, kern1, kern2, noise_var=1., name='KGPR'):
|
||||
Model.__init__(self, name=name)
|
||||
# accept the construction arguments
|
||||
self.X1 = ObsAr(X1)
|
||||
self.X2 = ObsAr(X2)
|
||||
self.Y = Y
|
||||
self.kern1, self.kern2 = kern1, kern2
|
||||
self.add_parameter(self.kern1)
|
||||
self.add_parameter(self.kern2)
|
||||
|
||||
self.likelihood = likelihoods.Gaussian()
|
||||
self.likelihood.variance = noise_var
|
||||
self.add_parameter(self.likelihood)
|
||||
|
||||
self.num_data1, self.input_dim1 = self.X1.shape
|
||||
self.num_data2, self.input_dim2 = self.X2.shape
|
||||
|
||||
assert kern1.input_dim == self.input_dim1
|
||||
assert kern2.input_dim == self.input_dim2
|
||||
assert Y.shape == (self.num_data1, self.num_data2)
|
||||
|
||||
def log_likelihood(self):
|
||||
return self._log_marginal_likelihood
|
||||
|
||||
def parameters_changed(self):
|
||||
(N1, D1), (N2, D2) = self.X1.shape, self.X2.shape
|
||||
K1, K2 = self.kern1.K(self.X1), self.kern2.K(self.X2)
|
||||
|
||||
# eigendecompositon
|
||||
S1, U1 = np.linalg.eigh(K1)
|
||||
S2, U2 = np.linalg.eigh(K2)
|
||||
W = np.kron(S2, S1) + self.likelihood.variance
|
||||
|
||||
Y_ = U1.T.dot(self.Y).dot(U2)
|
||||
|
||||
# store these quantities: needed for prediction
|
||||
Wi = 1./W
|
||||
Ytilde = Y_.flatten(order='F')*Wi
|
||||
|
||||
self._log_marginal_likelihood = -0.5*self.num_data1*self.num_data2*np.log(2*np.pi)\
|
||||
-0.5*np.sum(np.log(W))\
|
||||
-0.5*np.dot(Y_.flatten(order='F'), Ytilde)
|
||||
|
||||
# gradients for data fit part
|
||||
Yt_reshaped = Ytilde.reshape(N1, N2, order='F')
|
||||
tmp = U1.dot(Yt_reshaped)
|
||||
dL_dK1 = .5*(tmp*S2).dot(tmp.T)
|
||||
tmp = U2.dot(Yt_reshaped.T)
|
||||
dL_dK2 = .5*(tmp*S1).dot(tmp.T)
|
||||
|
||||
# gradients for logdet
|
||||
Wi_reshaped = Wi.reshape(N1, N2, order='F')
|
||||
tmp = np.dot(Wi_reshaped, S2)
|
||||
dL_dK1 += -0.5*(U1*tmp).dot(U1.T)
|
||||
tmp = np.dot(Wi_reshaped.T, S1)
|
||||
dL_dK2 += -0.5*(U2*tmp).dot(U2.T)
|
||||
|
||||
self.kern1.update_gradients_full(dL_dK1, self.X1)
|
||||
self.kern2.update_gradients_full(dL_dK2, self.X2)
|
||||
|
||||
# gradients for noise variance
|
||||
dL_dsigma2 = -0.5*Wi.sum() + 0.5*np.sum(np.square(Ytilde))
|
||||
self.likelihood.variance.gradient = dL_dsigma2
|
||||
|
||||
# store these quantities for prediction:
|
||||
self.Wi, self.Ytilde, self.U1, self.U2 = Wi, Ytilde, U1, U2
|
||||
|
||||
def predict(self, X1new, X2new):
|
||||
"""
|
||||
Return the predictive mean and variance at a series of new points X1new, X2new
|
||||
Only returns the diagonal of the predictive variance, for now.
|
||||
|
||||
:param X1new: The points at which to make a prediction
|
||||
:type X1new: np.ndarray, Nnew x self.input_dim1
|
||||
:param X2new: The points at which to make a prediction
|
||||
:type X2new: np.ndarray, Nnew x self.input_dim2
|
||||
|
||||
"""
|
||||
k1xf = self.kern1.K(X1new, self.X1)
|
||||
k2xf = self.kern2.K(X2new, self.X2)
|
||||
A = k1xf.dot(self.U1)
|
||||
B = k2xf.dot(self.U2)
|
||||
mu = A.dot(self.Ytilde.reshape(self.num_data1, self.num_data2, order='F')).dot(B.T).flatten(order='F')
|
||||
k1xx = self.kern1.Kdiag(X1new)
|
||||
k2xx = self.kern2.Kdiag(X2new)
|
||||
BA = np.kron(B, A)
|
||||
var = np.kron(k2xx, k1xx) - np.sum(BA**2*self.Wi, 1) + self.likelihood.variance
|
||||
|
||||
return mu[:, None], var[:, None]
|
||||
108
GPy/models/gp_var_gauss.py
Normal file
108
GPy/models/gp_var_gauss.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
# Copyright (c) 2014, James Hensman, Alan Saul
|
||||
# Distributed under the terms of the GNU General public License, see LICENSE.txt
|
||||
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
from scipy.special import erf
|
||||
from ..core.model import Model
|
||||
from ..core.parameterization import ObsAr
|
||||
from .. import kern
|
||||
from ..core.parameterization.param import Param
|
||||
from ..util.linalg import pdinv
|
||||
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
|
||||
class GPVariationalGaussianApproximation(Model):
|
||||
"""
|
||||
The Variational Gaussian Approximation revisited implementation for regression
|
||||
|
||||
@article{Opper:2009,
|
||||
title = {The Variational Gaussian Approximation Revisited},
|
||||
author = {Opper, Manfred and Archambeau, C{\'e}dric},
|
||||
journal = {Neural Comput.},
|
||||
year = {2009},
|
||||
pages = {786--792},
|
||||
}
|
||||
"""
|
||||
def __init__(self, X, Y, kernel=None):
|
||||
Model.__init__(self,'Variational GP classification')
|
||||
# accept the construction arguments
|
||||
self.X = ObsAr(X)
|
||||
if kernel is None:
|
||||
kernel = kern.RBF(X.shape[1]) + kern.White(X.shape[1], 0.01)
|
||||
self.kern = kernel
|
||||
self.add_parameter(self.kern)
|
||||
self.num_data, self.input_dim = self.X.shape
|
||||
|
||||
self.alpha = Param('alpha', np.zeros(self.num_data))
|
||||
self.beta = Param('beta', np.ones(self.num_data))
|
||||
self.add_parameter(self.alpha)
|
||||
self.add_parameter(self.beta)
|
||||
|
||||
self.gh_x, self.gh_w = np.polynomial.hermite.hermgauss(20)
|
||||
self.Ysign = np.where(Y==1, 1, -1).flatten()
|
||||
|
||||
def log_likelihood(self):
|
||||
"""
|
||||
Marginal log likelihood evaluation
|
||||
"""
|
||||
return self._log_lik
|
||||
|
||||
def likelihood_quadrature(self, m, v):
|
||||
"""
|
||||
Perform Gauss-Hermite quadrature over the log of the likelihood, with a fixed weight
|
||||
"""
|
||||
# assume probit for now.
|
||||
X = self.gh_x[None, :]*np.sqrt(2.*v[:, None]) + (m*self.Ysign)[:, None]
|
||||
p = stats.norm.cdf(X)
|
||||
N = stats.norm.pdf(X)
|
||||
F = np.log(p).dot(self.gh_w)
|
||||
NoverP = N/p
|
||||
dF_dm = (NoverP*self.Ysign[:,None]).dot(self.gh_w)
|
||||
dF_dv = -0.5*(NoverP**2 + NoverP*X).dot(self.gh_w)
|
||||
return F, dF_dm, dF_dv
|
||||
|
||||
def parameters_changed(self):
|
||||
K = self.kern.K(self.X)
|
||||
m = K.dot(self.alpha)
|
||||
KB = K*self.beta[:, None]
|
||||
BKB = KB*self.beta[None, :]
|
||||
A = np.eye(self.num_data) + BKB
|
||||
Ai, LA, _, Alogdet = pdinv(A)
|
||||
Sigma = np.diag(self.beta**-2) - Ai/self.beta[:, None]/self.beta[None, :] # posterior coavairance: need full matrix for gradients
|
||||
var = np.diag(Sigma)
|
||||
|
||||
F, dF_dm, dF_dv = self.likelihood_quadrature(m, var)
|
||||
dF_da = np.dot(K, dF_dm)
|
||||
SigmaB = Sigma*self.beta
|
||||
dF_db = -np.diag(Sigma.dot(np.diag(dF_dv)).dot(SigmaB))*2
|
||||
KL = 0.5*(Alogdet + np.trace(Ai) - self.num_data + m.dot(self.alpha))
|
||||
dKL_da = m
|
||||
A_A2 = Ai - Ai.dot(Ai)
|
||||
dKL_db = np.diag(np.dot(KB.T, A_A2))
|
||||
self._log_lik = F.sum() - KL
|
||||
self.alpha.gradient = dF_da - dKL_da
|
||||
self.beta.gradient = dF_db - dKL_db
|
||||
|
||||
# K-gradients
|
||||
dKL_dK = 0.5*(self.alpha[None, :]*self.alpha[:, None] + self.beta[:, None]*self.beta[None, :]*A_A2)
|
||||
tmp = Ai*self.beta[:, None]/self.beta[None, :]
|
||||
dF_dK = self.alpha[:, None]*dF_dm[None, :] + np.dot(tmp*dF_dv, tmp.T)
|
||||
self.kern.update_gradients_full(dF_dK - dKL_dK, self.X)
|
||||
|
||||
def predict(self, Xnew):
|
||||
"""
|
||||
Predict the function(s) at the new point(s) Xnew.
|
||||
|
||||
:param Xnew: The points at which to make a prediction
|
||||
:type Xnew: np.ndarray, Nnew x self.input_dim
|
||||
"""
|
||||
Wi, _, _, _ = pdinv(self.kern.K(self.X) + np.diag(self.beta**-2))
|
||||
Kux = self.kern.K(self.X, Xnew)
|
||||
mu = np.dot(Kux.T, self.alpha)
|
||||
WiKux = np.dot(Wi, Kux)
|
||||
Kxx = self.kern.Kdiag(Xnew)
|
||||
var = Kxx - np.sum(WiKux*Kux, 0)
|
||||
|
||||
return 0.5*(1+erf(mu/np.sqrt(2.*(var+1))))
|
||||
|
|
@ -23,8 +23,8 @@ class SSGPLVM(SparseGP):
|
|||
:type init: 'PCA'|'random'
|
||||
|
||||
"""
|
||||
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, mpi_comm=None, **kwargs):
|
||||
def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, mpi_comm=None, pi=None, learnPi=True, **kwargs):
|
||||
|
||||
self.mpi_comm = mpi_comm
|
||||
self.__IN_OPTIMIZATION__ = False
|
||||
|
|
@ -41,17 +41,17 @@ class SSGPLVM(SparseGP):
|
|||
if X_variance is None: # The variance of the variational approximation (S)
|
||||
X_variance = np.random.uniform(0,.1,X.shape)
|
||||
|
||||
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
|
||||
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
|
||||
gamma[gamma>1.-1e-9] = 1.-1e-9
|
||||
gamma[gamma<1e-9] = 1e-9
|
||||
if Gamma is None:
|
||||
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
|
||||
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
|
||||
gamma[gamma>1.-1e-9] = 1.-1e-9
|
||||
gamma[gamma<1e-9] = 1e-9
|
||||
else:
|
||||
gamma = Gamma.copy()
|
||||
|
||||
if Z is None:
|
||||
Z = np.random.permutation(X.copy())[:num_inducing]
|
||||
assert Z.shape[1] == X.shape[1]
|
||||
|
||||
pi = np.empty((input_dim))
|
||||
pi[:] = 0.5
|
||||
|
||||
if likelihood is None:
|
||||
likelihood = Gaussian()
|
||||
|
|
@ -64,7 +64,10 @@ class SSGPLVM(SparseGP):
|
|||
if inference_method is None:
|
||||
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
|
||||
|
||||
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=True) # the prior probability of the latent binary variable b
|
||||
if pi is None:
|
||||
pi = np.empty((input_dim))
|
||||
pi[:] = 0.5
|
||||
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi) # the prior probability of the latent binary variable b
|
||||
|
||||
X = SpikeAndSlabPosterior(X, X_variance, gamma)
|
||||
|
||||
|
|
|
|||
|
|
@ -30,18 +30,18 @@ def add_bar_labels(fig, ax, bars, bottom=0):
|
|||
c = 'k'
|
||||
transform = transOffsetUp
|
||||
ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va, transform=transform)
|
||||
|
||||
|
||||
ax.set_xticks([])
|
||||
|
||||
|
||||
def plot_bars(fig, ax, x, ard_params, color, name, bottom=0):
|
||||
from ...util.misc import param_to_array
|
||||
return ax.bar(left=x, height=param_to_array(ard_params), width=.8,
|
||||
bottom=bottom, align='center',
|
||||
color=color, edgecolor='k', linewidth=1.2,
|
||||
return ax.bar(left=x, height=param_to_array(ard_params), width=.8,
|
||||
bottom=bottom, align='center',
|
||||
color=color, edgecolor='k', linewidth=1.2,
|
||||
label=name.replace("_"," "))
|
||||
|
||||
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
||||
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False, filtering=None):
|
||||
"""
|
||||
If an ARD kernel is present, plot a bar representation using matplotlib
|
||||
|
||||
|
|
@ -51,6 +51,10 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
|||
title of the plot,
|
||||
pass '' to not print a title
|
||||
pass None for a generic title
|
||||
:param filtering: list of names, which to use for plotting ARD parameters.
|
||||
Only kernels which match names in the list of names in filtering
|
||||
will be used for plotting.
|
||||
:type filtering: list of names to use for ARD plot
|
||||
"""
|
||||
fig, ax = ax_default(fignum,ax)
|
||||
|
||||
|
|
@ -58,19 +62,25 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
|||
ax.set_title('ARD parameters, %s kernel' % kernel.name)
|
||||
else:
|
||||
ax.set_title(title)
|
||||
|
||||
|
||||
Tango.reset()
|
||||
bars = []
|
||||
|
||||
ard_params = np.atleast_2d(kernel.input_sensitivity())
|
||||
|
||||
ard_params = np.atleast_2d(kernel.input_sensitivity(summarize=False))
|
||||
bottom = 0
|
||||
x = np.arange(kernel.input_dim)
|
||||
|
||||
|
||||
if order is None:
|
||||
order = kernel.parameter_names(recursive=False)
|
||||
|
||||
for i in range(ard_params.shape[0]):
|
||||
c = Tango.nextMedium()
|
||||
bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel.parameters[i].name, bottom=bottom))
|
||||
bottom += ard_params[i,:]
|
||||
|
||||
if kernel.parameters[i].name in order:
|
||||
c = Tango.nextMedium()
|
||||
bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel.parameters[i].name, bottom=bottom))
|
||||
bottom += ard_params[i,:]
|
||||
else:
|
||||
print "filtering out {}".format(kernel.parameters[i].name)
|
||||
|
||||
ax.set_xlim(-.5, kernel.input_dim - .5)
|
||||
add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-ard_params[i,:])
|
||||
|
||||
|
|
|
|||
|
|
@ -423,6 +423,45 @@ class GradientTests(np.testing.TestCase):
|
|||
m = GPy.models.GPHeteroscedasticRegression(X,Y,kern)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
def test_gp_kronecker_gaussian(self):
|
||||
N1, N2 = 30, 20
|
||||
X1 = np.random.randn(N1, 1)
|
||||
X2 = np.random.randn(N2, 1)
|
||||
X1.sort(0); X2.sort(0)
|
||||
k1 = GPy.kern.RBF(1) # + GPy.kern.White(1)
|
||||
k2 = GPy.kern.RBF(1) # + GPy.kern.White(1)
|
||||
Y = np.random.randn(N1, N2)
|
||||
m = GPy.models.GPKroneckerGaussianRegression(X1, X2, Y, k1, k2)
|
||||
|
||||
# build the model the dumb way
|
||||
assert (N1*N2<1000), "too much data for standard GPs!"
|
||||
yy, xx = np.meshgrid(X2, X1)
|
||||
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
|
||||
kg = GPy.kern.RBF(1, active_dims=[0]) * GPy.kern.RBF(1, active_dims=[1])
|
||||
mm = GPy.models.GPRegression(Xgrid, Y.reshape(-1, 1, order='F'), kernel=kg)
|
||||
|
||||
m.randomize()
|
||||
mm[:] = m[:]
|
||||
assert np.allclose(m.log_likelihood(), mm.log_likelihood())
|
||||
assert np.allclose(m.gradient, mm.gradient)
|
||||
X1test = np.random.randn(100, 1)
|
||||
X2test = np.random.randn(100, 1)
|
||||
mean1, var1 = m.predict(X1test, X2test)
|
||||
yy, xx = np.meshgrid(X2test, X1test)
|
||||
Xgrid = np.vstack((xx.flatten(order='F'), yy.flatten(order='F'))).T
|
||||
mean2, var2 = mm.predict(Xgrid)
|
||||
assert np.allclose(mean1, mean2)
|
||||
assert np.allclose(var1, var2)
|
||||
|
||||
def test_gp_VGPC(self):
|
||||
num_obs = 25
|
||||
X = np.random.randint(0,140,num_obs)
|
||||
X = X[:,None]
|
||||
Y = 25. + np.sin(X/20.) * 2. + np.random.rand(num_obs)[:,None]
|
||||
kern = GPy.kern.Bias(1) + GPy.kern.RBF(1)
|
||||
m = GPy.models.GPVariationalGaussianApproximation(X,Y,kern)
|
||||
self.assertTrue(m.checkgrad())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print "Running unit tests, please be (very) patient..."
|
||||
|
|
|
|||
|
|
@ -33,13 +33,15 @@ class Cacher(object):
|
|||
"""returns the self.id of an object, to be used in caching individual self.ids"""
|
||||
return hex(id(obj))
|
||||
|
||||
def combine_inputs(self, args, kw):
|
||||
def combine_inputs(self, args, kw, ignore_args):
|
||||
"Combines the args and kw in a unique way, such that ordering of kwargs does not lead to recompute"
|
||||
return args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0]))
|
||||
inputs= args + tuple(c[1] for c in sorted(kw.items(), key=lambda x: x[0]))
|
||||
# REMOVE the ignored arguments from input and PREVENT it from being checked!!!
|
||||
return [a for i,a in enumerate(inputs) if i not in ignore_args]
|
||||
|
||||
def prepare_cache_id(self, combined_args_kw, ignore_args):
|
||||
"get the cacheid (conc. string of argument self.ids in order) ignoring ignore_args"
|
||||
cache_id = "".join(self.id(a) for i, a in enumerate(combined_args_kw) if i not in ignore_args)
|
||||
def prepare_cache_id(self, combined_args_kw):
|
||||
"get the cacheid (conc. string of argument self.ids in order)"
|
||||
cache_id = "".join(self.id(a) for a in combined_args_kw)
|
||||
return cache_id
|
||||
|
||||
def ensure_cache_length(self, cache_id):
|
||||
|
|
@ -95,10 +97,12 @@ class Cacher(object):
|
|||
return self.operation(*args, **kw)
|
||||
|
||||
# 2: prepare_cache_id and get the unique self.id string for this call
|
||||
inputs = self.combine_inputs(args, kw)
|
||||
cache_id = self.prepare_cache_id(inputs, self.ignore_args)
|
||||
inputs = self.combine_inputs(args, kw, self.ignore_args)
|
||||
cache_id = self.prepare_cache_id(inputs)
|
||||
# 2: if anything is not cachable, we will just return the operation, without caching
|
||||
if reduce(lambda a, b: a or (not (isinstance(b, Observable) or b is None)), inputs, False):
|
||||
#print 'WARNING: '+self.operation.__name__ + ' not cacheable!'
|
||||
#print [not (isinstance(b, Observable)) for b in inputs]
|
||||
return self.operation(*args, **kw)
|
||||
# 3&4: check whether this cache_id has been cached, then has it changed?
|
||||
try:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue